diff --git a/.circleci/config.yml b/.circleci/config.yml
index 47f7ad9b1..785b383e1 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -2,7 +2,7 @@ version: 2.1
 jobs:
   test-local-gcc:
     machine:
-      image: ubuntu-2004:202010-01
+      image: default
     working_directory: ~/criu
     steps:
       - checkout
@@ -11,7 +11,7 @@ jobs:
           command: sudo -E make -C scripts/ci local
   test-local-clang:
     machine:
-      image: ubuntu-2004:202010-01
+      image: default
     working_directory: ~/criu
     steps:
       - checkout
diff --git a/.cirrus.yml b/.cirrus.yml
index 2b6903ddc..72dbb3898 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -13,12 +13,41 @@ task:
     nested_virtualization: true
 
   setup_script: |
-    scripts/ci/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
+    contrib/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
     sudo kvm-ok
-    ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
   build_script: |
     make -C scripts/ci vagrant-fedora-no-vdso
 
+task:
+  name: CentOS Stream 9 based test
+  environment:
+    HOME: "/root"
+    CIRRUS_WORKING_DIR: "/tmp/criu"
+
+  compute_engine_instance:
+    image_project: centos-cloud
+    image: family/centos-stream-9
+    platform: linux
+    cpu: 4
+    memory: 8G
+
+  setup_script: |
+    dnf config-manager --set-enabled crb # Same as CentOS 8 powertools
+    dnf -y install epel-release epel-next-release
+    contrib/dependencies/dnf-packages.sh
+    # The image has a too old version of nettle which does not work with gnutls.
+    # Just upgrade to the latest to make the error go away.
+    dnf -y upgrade nettle nettle-devel
+    systemctl stop sssd
+    # Even with selinux in permissive mode the selinux tests will be executed.
+    # The Cirrus CI user runs as a service from selinux point of view and is
+    # much more restricted than a normal shell (system_u:system_r:unconfined_service_t:s0).
+    # The test case above (vagrant-fedora-no-vdso) should run selinux tests in enforcing mode.
+    setenforce 0
+
+  build_script: |
+    make -C scripts/ci local SKIP_CI_PREP=1 CC=gcc CD_TO_TOP=1 ZDTM_OPTS="-x zdtm/static/socket-raw"
+
 task:
   name: Vagrant Fedora Rawhide based test
   environment:
@@ -34,67 +63,39 @@ task:
     nested_virtualization: true
 
   setup_script: |
-    scripts/ci/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
+    contrib/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
     sudo kvm-ok
-    ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
   build_script: |
     make -C scripts/ci vagrant-fedora-rawhide
 
 task:
-  name: CentOS 8 based test
+  name: Vagrant Fedora based test (non-root)
   environment:
     HOME: "/root"
     CIRRUS_WORKING_DIR: "/tmp/criu"
 
   compute_engine_instance:
-    image_project: centos-cloud
-    image: family/centos-stream-8
+    image_project: cirrus-images
+    image: family/docker-kvm
     platform: linux
     cpu: 4
-    memory: 8G
+    memory: 16G
+    nested_virtualization: true
 
   setup_script: |
-    ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
-    yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm dnf-plugins-core
-    yum config-manager --set-enabled powertools
-    yum install -y --allowerasing asciidoc gcc git gnutls-devel libaio-devel libasan libcap-devel libnet-devel libnl3-devel libbsd-devel libselinux-devel make protobuf-c-devel protobuf-devel python3-devel python3-flake8 python3-PyYAML python3-future python3-protobuf xmlto
-    alternatives --set python /usr/bin/python3
-    systemctl stop sssd
-    # Even with selinux in permissive mode the selinux tests will be executed
-    # The Cirrus CI user runs as a service from selinux point of view and is
-    # much more restricted than a normal shell (system_u:system_r:unconfined_service_t:s0)
-    # The test case above (vagrant-fedora-no-vdso) should run selinux tests in enforcing mode
-    setenforce 0
-    pip3 install junit_xml
-
+    contrib/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
+    sudo kvm-ok
   build_script: |
-    make -C scripts/ci local SKIP_CI_PREP=1 CC=gcc CD_TO_TOP=1 ZDTM_OPTS="-x zdtm/static/socket-raw"
+    make -C scripts/ci vagrant-fedora-non-root
 
 task:
-  name: CentOS 7 based test
-  environment:
-    HOME: "/root"
-    CIRRUS_WORKING_DIR: "/tmp/criu"
-
-  compute_engine_instance:
-    image_project: centos-cloud
-    image: family/centos-7
-    platform: linux
+  name: aarch64 Fedora Rawhide
+  arm_container:
+    image: registry.fedoraproject.org/fedora:rawhide
     cpu: 4
-    memory: 8G
-
-  setup_script: |
-    ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
-    yum install -y findutils gcc git gnutls-devel iproute iptables libaio-devel libasan libcap-devel libnet-devel libnl3-devel libbsd-devel make procps-ng protobuf-c-devel protobuf-devel protobuf-python python python-flake8 python-ipaddress python2-future python2-junit_xml python-yaml python-six sudo tar which e2fsprogs python2-pip rubygem-asciidoctor libselinux-devel
-    # Even with selinux in permissive mode the selinux tests will be executed
-    # The Cirrus CI user runs as a service from selinux point of view and is
-    # much more restricted than a normal shell (system_u:system_r:unconfined_service_t:s0)
-    # The test case above (vagrant-fedora-no-vdso) should run selinux tests in enforcing mode
-    setenforce 0
-    # Enable user namespaces on CentOS 7
-    echo 10000 > /proc/sys/user/max_user_namespaces
-    # Adapt sudoers to our needs
-    echo 'root ALL=(ALL:ALL) ALL' | EDITOR='tee -a' visudo
-
+    memory: 4G
+  script: uname -a
   build_script: |
-    make -C scripts/ci local SKIP_CI_PREP=1 CC=gcc CD_TO_TOP=1 ZDTM_IGNORE_TAINT=1 ZDTM_OPTS="-x zdtm/static/socket-raw -x zdtm/static/child_subreaper_existing_child -x zdtm/static/fifo_upon_unix_socket01 -x zdtm/static/overmount_sock -x zdtm/static/tempfs_overmounted"
+    scripts/ci/prepare-for-fedora-rawhide.sh
+    make -C scripts/ci/ local CC=gcc SKIP_CI_PREP=1 SKIP_CI_TEST=1 CD_TO_TOP=1
+    make -C test/zdtm -j 4
diff --git a/.clang-format b/.clang-format
index 96ba5909f..fb40bc613 100644
--- a/.clang-format
+++ b/.clang-format
@@ -53,7 +53,7 @@ BreakConstructorInitializersBeforeComma: false
 BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0
 BreakAfterJavaFieldAnnotations: false
 BreakStringLiterals: false
-ColumnLimit: 120
+ColumnLimit: 0
 CommentPragmas: '^ IWYU pragma:'
 CompactNamespaces: false # Unknown to clang-format-4.0
 ConstructorInitializerAllOnOneLineOrOnePerLine: false
@@ -71,6 +71,7 @@ FixNamespaceComments: false # Unknown to clang-format-4.0
 #   | sort | uniq
 ForEachMacros:
   - 'for_each_pstree_item'
+  - 'for_each_bit'
   - 'apei_estatus_for_each_section'
   - 'ata_for_each_dev'
   - 'ata_for_each_link'
@@ -515,6 +516,7 @@ IncludeCategories:
     Priority: 1
 IncludeIsMainRegex: '(Test)?$'
 IndentCaseLabels: false
+IndentGotoLabels: false
 IndentPPDirectives: None # Unknown to clang-format-5.0
 IndentWidth: 8
 IndentWrappedFunctionNames: false
diff --git a/.codespellrc b/.codespellrc
index 765dacfab..5def594b2 100644
--- a/.codespellrc
+++ b/.codespellrc
@@ -1,3 +1,3 @@
 [codespell]
-skip = ./.git,./test/pki
-ignore-words-list = creat,fpr,fle,ue,bord,parms,nd,te,testng
+skip = ./.git,./test/pki,./tags,./plugins/amdgpu/amdgpu_drm.h,./plugins/amdgpu/drm.h,./plugins/amdgpu/drm_mode.h
+ignore-words-list = creat,fpr,fle,ue,bord,parms,nd,te,testng,inh,wronly,renderd,bui,clen,sems
diff --git a/.drone.yml b/.drone.yml
deleted file mode 100644
index 07eb8be65..000000000
--- a/.drone.yml
+++ /dev/null
@@ -1,82 +0,0 @@
----
-kind: pipeline
-type: docker
-name: aarch64 build GCC (native)
-
-platform:
-  os: linux
-  arch: arm64
-
-steps:
-- name: build
-  image: ubuntu:focal
-  commands:
-  - scripts/ci/apt-install make
-  - make -C scripts/ci local
-
----
-kind: pipeline
-type: docker
-name: aarch64 build CLANG (native)
-
-platform:
-  os: linux
-  arch: arm64
-
-steps:
-- name: build
-  image: ubuntu:focal
-  commands:
-  - scripts/ci/apt-install make
-  - make -C scripts/ci local CLANG=1
-
----
-kind: pipeline
-type: docker
-name: armhf build GCC (native)
-
-platform:
-  os: linux
-  arch: arm
-
-steps:
-- name: build
-  # At the time of setting up focal did not work
-  image: ubuntu:bionic
-  commands:
-  - scripts/ci/apt-install make
-  - make -C scripts/ci local
-
----
-kind: pipeline
-type: docker
-name: armhf build CLANG (native)
-
-platform:
-  os: linux
-  arch: arm
-
-steps:
-- name: build
-  # At the time of setting up focal did not work
-  image: ubuntu:bionic
-  commands:
-  - scripts/ci/apt-install make
-  - make -C scripts/ci local CLANG=1
-
----
-kind: pipeline
-type: docker
-name: aarch64 Fedora Rawhide
-
-platform:
-  os: linux
-  arch: arm64
-
-steps:
-- name: build
-  image: registry.fedoraproject.org/fedora:rawhide
-  commands:
-  - scripts/ci/prepare-for-fedora-rawhide.sh
-  - make -C scripts/ci/ local CC=gcc SKIP_CI_PREP=1 SKIP_CI_TEST=1 CD_TO_TOP=1
-  - make -C test/zdtm -j 4
diff --git a/.github/workflows/aarch64-test.yaml b/.github/workflows/aarch64-test.yaml
new file mode 100644
index 000000000..ebbecadb3
--- /dev/null
+++ b/.github/workflows/aarch64-test.yaml
@@ -0,0 +1,34 @@
+name: aarch64 test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: aarch64-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+  build:
+    strategy:
+      matrix:
+        os: [ubuntu-24.04-arm, ubuntu-22.04-arm]
+        target: [GCC=1, CLANG=1]
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Run Tests ${{ matrix.target }} on ${{ matrix.os }}
+      # Following tests are failing on the VMs:
+      #  ./change_mnt_context --pidfile=change_mnt_context.pid --outfile=change_mnt_context.out
+      #   45: ERR: change_mnt_context.c:23: mount (errno = 22 (Invalid argument))
+      #
+      # In combination with '--remote-lazy-pages' following error occurs:
+      #  138: FAIL: maps05.c:84: Data corrupted at page 1639 (errno = 11 (Resource temporarily unavailable))
+      run: |
+        # The 'sched_policy00' needs the following:
+        sudo sysctl -w kernel.sched_rt_runtime_us=-1
+        # etc/hosts entry is needed for netns_lock_iptables
+        echo "127.0.0.1   localhost" | sudo tee -a /etc/hosts
+        sudo -E make -C scripts/ci local ${{ matrix.target }} RUN_TESTS=1 \
+          ZDTM_OPTS="-x zdtm/static/change_mnt_context -x zdtm/static/maps05"
diff --git a/.github/workflows/alpine-test.yml b/.github/workflows/alpine-test.yml
index 6fc546ff5..0f5c20f48 100644
--- a/.github/workflows/alpine-test.yml
+++ b/.github/workflows/alpine-test.yml
@@ -2,14 +2,20 @@ name: Alpine Test
 
 on: [push, pull_request]
 
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: alpine-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
 jobs:
   build:
-    runs-on: ubuntu-20.04
     strategy:
       matrix:
+        os: [ubuntu-22.04, ubuntu-22.04-arm]
         target: [GCC=1, CLANG=1]
+    runs-on: ${{ matrix.os }}
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Run Alpine ${{ matrix.target }} Test
       run: sudo -E make -C scripts/ci alpine ${{ matrix.target }}
diff --git a/.github/workflows/archlinux-test.yml b/.github/workflows/archlinux-test.yml
index bb98623a8..425f0662b 100644
--- a/.github/workflows/archlinux-test.yml
+++ b/.github/workflows/archlinux-test.yml
@@ -2,10 +2,15 @@ name: Arch Linux Test
 
 on: [push, pull_request]
 
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: archlinux-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
 jobs:
   build:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Run Arch Linux Test
       run: sudo -E make -C scripts/ci archlinux
diff --git a/.github/workflows/check-commits.yml b/.github/workflows/check-commits.yml
new file mode 100644
index 000000000..bf7d06697
--- /dev/null
+++ b/.github/workflows/check-commits.yml
@@ -0,0 +1,30 @@
+name: Verify self-contained commits
+
+on: pull_request
+
+# Cancel any preceding run on the pull request
+concurrency:
+  group: commit-test-${{ github.event.pull_request.number }}
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    # Check if pull request does not have label "not-selfcontained-ok"
+    if: "!contains(github.event.pull_request.labels.*.name, 'not-selfcontained-ok')"
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        # Needed to rebase against the base branch
+        fetch-depth: 0
+        # Checkout pull request HEAD commit instead of merge commit
+        ref: ${{ github.event.pull_request.head.sha }}
+    - name: Install dependencies
+      run: sudo contrib/apt-install libprotobuf-dev libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python3-protobuf libnl-3-dev libnet-dev libcap-dev uuid-dev
+    - name: Configure git user details
+      run: |
+        git config --global user.email "checkpoint-restore@users.noreply.github.com"
+        git config --global user.name "checkpoint-restore"
+    - name: Configure base branch without switching current branch
+      run: git fetch origin ${{ github.base_ref }}:${{ github.base_ref }}
+    - name: Build each commit
+      run: git rebase ${{ github.base_ref }} -x "make -C scripts/ci check-commit"
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
new file mode 100644
index 000000000..9c9e46c1b
--- /dev/null
+++ b/.github/workflows/codeql.yml
@@ -0,0 +1,50 @@
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ "criu-dev", "master" ]
+  pull_request:
+    branches: [ "criu-dev" ]
+  schedule:
+    - cron: "11 6 * * 3"
+
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: codeql-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ python, cpp ]
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install Packages (cpp)
+        if: ${{ matrix.language == 'cpp' }}
+        run: |
+          sudo contrib/apt-install protobuf-c-compiler libprotobuf-c-dev libprotobuf-dev build-essential libprotobuf-dev libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python3-protobuf libnet-dev pkg-config libnl-3-dev libbsd0 libbsd-dev iproute2 libcap-dev libaio-dev libbsd-dev python3-yaml libnl-route-3-dev gnutls-dev
+      - name: Initialize CodeQL
+        uses: github/codeql-action/init@v3
+        with:
+          languages: ${{ matrix.language }}
+          queries: +security-and-quality
+
+      - name: Autobuild
+        uses: github/codeql-action/autobuild@v3
+
+      - name: Perform CodeQL Analysis
+        uses: github/codeql-action/analyze@v3
+        with:
+          category: "/language:${{ matrix.language }}"
diff --git a/.github/workflows/compat-test.yml b/.github/workflows/compat-test.yml
index 5ae25fb73..8a64ce185 100644
--- a/.github/workflows/compat-test.yml
+++ b/.github/workflows/compat-test.yml
@@ -2,15 +2,20 @@ name: Compat Tests
 
 on: [push, pull_request]
 
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: compat-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
 jobs:
   build:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
         target: [GCC, CLANG]
 
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Run Compat Tests (${{ matrix.target }})
       run: sudo -E make -C scripts/ci local COMPAT_TEST=y ${{ matrix.target }}=1
diff --git a/.github/workflows/cross-compile-daily.yml b/.github/workflows/cross-compile-daily.yml
index 927ddced2..c709cca00 100644
--- a/.github/workflows/cross-compile-daily.yml
+++ b/.github/workflows/cross-compile-daily.yml
@@ -10,11 +10,11 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        target: [armv7-stable-cross, aarch64-stable-cross, ppc64-stable-cross, mips64el-stable-cross]
+        target: [armv7-stable-cross, aarch64-stable-cross, ppc64-stable-cross, mips64el-stable-cross, riscv64-stable-cross]
         branches: [criu-dev, master]
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
       with:
         ref: ${{ matrix.branches }}
     - name: Run Cross Compilation Targets
diff --git a/.github/workflows/cross-compile.yml b/.github/workflows/cross-compile.yml
index be8e7f09c..96672b294 100644
--- a/.github/workflows/cross-compile.yml
+++ b/.github/workflows/cross-compile.yml
@@ -2,6 +2,11 @@ name: Cross Compile Tests
 
 on: [push, pull_request]
 
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: cross-compile-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
 jobs:
   build:
 
@@ -16,6 +21,7 @@ jobs:
           aarch64-stable-cross,
           ppc64-stable-cross,
           mips64el-stable-cross,
+          riscv64-stable-cross,
         ]
         include:
           - experimental: true
@@ -28,7 +34,7 @@ jobs:
             target: mips64el-unstable-cross
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Run Cross Compilation Targets
       run: >
         sudo make -C scripts/ci ${{ matrix.target }}
diff --git a/.github/workflows/docker-test.yml b/.github/workflows/docker-test.yml
index 564691449..23696905a 100644
--- a/.github/workflows/docker-test.yml
+++ b/.github/workflows/docker-test.yml
@@ -2,13 +2,18 @@ name: Docker Test
 
 on: [push, pull_request]
 
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: docker-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
 jobs:
   build:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-20.04]
+        os: [ubuntu-22.04]
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Run Docker Test (${{ matrix.os }})
       run: sudo make -C scripts/ci docker-test
diff --git a/.github/workflows/fedora-asan-test.yml b/.github/workflows/fedora-asan-test.yml
index 44b0f16d6..02dc9a1b3 100644
--- a/.github/workflows/fedora-asan-test.yml
+++ b/.github/workflows/fedora-asan-test.yml
@@ -2,11 +2,16 @@ name: Fedora ASAN Test
 
 on: [push, pull_request]
 
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: fedora-asan-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
 jobs:
   build:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Run Fedora ASAN Test
       run: sudo -E make -C scripts/ci fedora-asan
diff --git a/.github/workflows/fedora-rawhide-test.yml b/.github/workflows/fedora-rawhide-test.yml
index 00bc3b2bd..83e2ead82 100644
--- a/.github/workflows/fedora-rawhide-test.yml
+++ b/.github/workflows/fedora-rawhide-test.yml
@@ -2,11 +2,20 @@ name: Fedora Rawhide Test
 
 on: [push, pull_request]
 
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: fedora-rawhide-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
 jobs:
   build:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Run Fedora Rawhide Test
-      run: sudo -E make -C scripts/ci fedora-rawhide CONTAINER_RUNTIME=podman BUILD_OPTIONS="--security-opt seccomp=unconfined"
+      # We need to pass environment variables from the CI environment to
+      # distinguish between CI environments. However, we need to make sure that
+      # XDG_RUNTIME_DIR environment variable is not set due to a bug in Podman.
+      # FIXME: https://github.com/containers/podman/issues/14920
+      run: sudo -E XDG_RUNTIME_DIR= make -C scripts/ci fedora-rawhide CONTAINER_RUNTIME=podman BUILD_OPTIONS="--security-opt seccomp=unconfined"
diff --git a/.github/workflows/gcov-test.yml b/.github/workflows/gcov-test.yml
index f782c5b9d..cc4e1d44a 100644
--- a/.github/workflows/gcov-test.yml
+++ b/.github/workflows/gcov-test.yml
@@ -2,12 +2,17 @@ name: Coverage Tests
 
 on: [push, pull_request]
 
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: gcov-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
 jobs:
   build:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Run Coverage Tests
       run: sudo -E make -C scripts/ci local GCOV=1
     - name: Run gcov
diff --git a/.github/workflows/java-test.yml b/.github/workflows/java-test.yml
new file mode 100644
index 000000000..cbd3c1f23
--- /dev/null
+++ b/.github/workflows/java-test.yml
@@ -0,0 +1,16 @@
+name: Java Test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: java-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+  build:
+    runs-on: ubuntu-22.04
+    steps:
+    - uses: actions/checkout@v4
+    - name: Run Java Test
+      run: sudo make -C scripts/ci java-test
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index c3886c707..f7da4f6f6 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -2,6 +2,11 @@ name: Run code linter
 
 on: [push, pull_request]
 
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: lint-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
 jobs:
   build:
     runs-on: ubuntu-latest
@@ -9,9 +14,9 @@ jobs:
       image: registry.fedoraproject.org/fedora:latest
     steps:
     - name: Install tools
-      run: sudo dnf -y install git make python3-flake8 ShellCheck clang-tools-extra which findutils codespell
+      run: sudo dnf -y install git make ruff xz clang-tools-extra codespell git-clang-format ShellCheck
 
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
 
     - name: Set git safe directory
       # https://github.com/actions/checkout/issues/760
@@ -21,13 +26,15 @@ jobs:
       run: make lint
 
     - name: Run make indent
-      run: >
-        make indent &&
-        STATUS=$(git status --porcelain) &&
-        if [ ! -z "$STATUS" ]; then
-          echo "FAIL: some files are not correctly formatted.";
-          echo "$STATUS"
-          git diff
-          echo "FAIL: please run 'make indent'";
-          exit 1;
+      continue-on-error: true
+      run: |
+        if [ -z "${{github.base_ref}}" ]; then
+          git fetch --deepen=1
+          make indent
+        else
+          git fetch origin ${{github.base_ref}}
+          make indent BASE=origin/${{github.base_ref}}
         fi
+    - name: Raise in-line make indent warnings
+      run: |
+        git diff | ./scripts/github-indent-warnings.py
diff --git a/.github/workflows/loongarch64-qemu-test.yml b/.github/workflows/loongarch64-qemu-test.yml
new file mode 100644
index 000000000..d7c554c87
--- /dev/null
+++ b/.github/workflows/loongarch64-qemu-test.yml
@@ -0,0 +1,15 @@
+name: LoongArch64 Qemu Test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: loongarch64-qemu-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+  build:
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+      - run: sudo make -C scripts/ci loongarch64-qemu-test
diff --git a/.github/workflows/manage-labels.yml b/.github/workflows/manage-labels.yml
new file mode 100644
index 000000000..a2bcd8860
--- /dev/null
+++ b/.github/workflows/manage-labels.yml
@@ -0,0 +1,14 @@
+name: Remove labels
+on: [issue_comment, pull_request_review_comment]
+jobs:
+  remove-labels-on-comments:
+    name: Remove labels on comments
+    if: github.event_name == 'issue_comment'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: mondeja/remove-labels-gh-action@v1
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          labels: |
+            changes requested
+            awaiting reply
diff --git a/.github/workflows/nftables-test.yml b/.github/workflows/nftables-test.yml
new file mode 100644
index 000000000..7a7d8bd30
--- /dev/null
+++ b/.github/workflows/nftables-test.yml
@@ -0,0 +1,24 @@
+name: Nftables bases testing
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: nftables-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+  build:
+    runs-on: ubuntu-24.04
+    steps:
+    - uses: actions/checkout@v4
+    - name: Remove iptables
+      run: sudo apt remove -y iptables
+    - name: Install libnftables-dev
+      run: sudo contrib/apt-install libnftables-dev
+    - name: chmod 755 /home/runner
+      # CRIU's tests are sometimes running as some random user and need
+      # to be able to access the test files.
+      run: sudo chmod 755 /home/runner
+    - name: Build with nftables network locking backend
+      run: sudo make -C scripts/ci local COMPILE_FLAGS="NETWORK_LOCK_DEFAULT=NETWORK_LOCK_NFTABLES"
diff --git a/.github/workflows/openj9-test.yml b/.github/workflows/openj9-test.yml
deleted file mode 100644
index 1d7a1eb6b..000000000
--- a/.github/workflows/openj9-test.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-name: OpenJ9 Test
-
-on: [push, pull_request]
-
-jobs:
-  build:
-    runs-on: ubuntu-20.04
-    steps:
-    - uses: actions/checkout@v2
-    - name: Run OpenJ9 Test
-      run: sudo make -C scripts/ci openj9-test
diff --git a/.github/workflows/podman-test.yml b/.github/workflows/podman-test.yml
index 447cbf0b6..a07edbe5b 100644
--- a/.github/workflows/podman-test.yml
+++ b/.github/workflows/podman-test.yml
@@ -2,10 +2,15 @@ name: Podman Test
 
 on: [push, pull_request]
 
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: podman-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
 jobs:
   build:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Run Podman Test
       run: sudo make -C scripts/ci podman-test
diff --git a/.github/workflows/stream-test.yml b/.github/workflows/stream-test.yml
index ecdd81e0a..76bd96edf 100644
--- a/.github/workflows/stream-test.yml
+++ b/.github/workflows/stream-test.yml
@@ -2,11 +2,16 @@ name: CRIU Image Streamer Test
 
 on: [push, pull_request]
 
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: stream-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
 jobs:
   build:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Run CRIU Image Streamer Test
       run: sudo -E make -C scripts/ci local STREAM_TEST=1
diff --git a/.github/workflows/x86-64-clang-test.yml b/.github/workflows/x86-64-clang-test.yml
index e6e84ef52..1f0a469bd 100644
--- a/.github/workflows/x86-64-clang-test.yml
+++ b/.github/workflows/x86-64-clang-test.yml
@@ -2,10 +2,15 @@ name: X86_64 CLANG Test
 
 on: [push, pull_request]
 
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: clang-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
 jobs:
   build:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Run X86_64 CLANG Test
       run: sudo make -C scripts/ci x86_64 CLANG=1
diff --git a/.github/workflows/x86-64-gcc-test.yml b/.github/workflows/x86-64-gcc-test.yml
index b8b81ef15..15e84a0df 100644
--- a/.github/workflows/x86-64-gcc-test.yml
+++ b/.github/workflows/x86-64-gcc-test.yml
@@ -2,10 +2,15 @@ name: X86_64 GCC Test
 
 on: [push, pull_request]
 
+# Cancel any preceding run on the pull request.
+concurrency:
+  group: gcc-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
 jobs:
   build:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Run X86_64 GCC Test
       run: sudo make -C scripts/ci x86_64
diff --git a/.gitignore b/.gitignore
index d5135f5f8..94daa13ea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,26 +20,16 @@ compel/compel
 compel/compel-host-bin
 images/*.c
 images/*.h
-images/google/protobuf/*.c
-images/google/protobuf/*.h
 .gitid
 criu/criu
 criu/unittest/unittest
-crit/crit
-criu/arch/*/sys-exec-tbl*.c
-# x86 syscalls-table is not generated
-!criu/arch/x86/sys-exec-tbl.c
-criu/arch/*/syscalls*.S
-criu/include/syscall-codes*.h
-criu/include/syscall*.h
 criu/include/version.h
 criu/pie/restorer-blob.h
 criu/pie/parasite-blob.h
 criu/protobuf-desc-gen.h
 lib/build/
 lib/c/criu.pc
-lib/.crit-setup.files
 compel/include/asm
 include/common/asm
 include/common/config.h
-build/
+build/**
diff --git a/.lgtm.yml b/.lgtm.yml
index a884a53ef..4beadcc63 100644
--- a/.lgtm.yml
+++ b/.lgtm.yml
@@ -22,10 +22,4 @@ extraction:
       - "libbsd-dev"
       - "python3-yaml"
       - "libnl-route-3-dev"
-      - "python-future"
       - "gnutls-dev"
-    configure:
-      command:
-      - "ls -laR images/google"
-      - "ln -s /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto"
-      - "ls -laR images/google"
diff --git a/.mailmap b/.mailmap
index 6f046b972..8076f0bc9 100644
--- a/.mailmap
+++ b/.mailmap
@@ -6,3 +6,5 @@ Andrei Vagin <avagin@gmail.com> <avagin@virtuozzo.com>
 Andrei Vagin <avagin@gmail.com> <avagin@odin.com>
 Andrei Vagin <avagin@gmail.com> <avagin@google.com>
 Cyrill Gorcunov <gorcunov@openvz.org> <gorcunov@gmail.com>
+Alexander Mikhalitsyn <alexander@mihalicyn.com> <alexander.mikhalitsyn@virtuozzo.com>
+Alexander Mikhalitsyn <alexander@mihalicyn.com> <aleksandr.mikhalitsyn@canonical.com>
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 94841b3f3..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,35 +0,0 @@
-language: c
-os: linux
-dist: bionic
-services:
-  - docker
-jobs:
-  include:
-    - os: linux
-      arch: ppc64le
-      env: TR_ARCH=local
-      dist: bionic
-    - os: linux
-      arch: ppc64le
-      env: TR_ARCH=local CLANG=1
-      dist: bionic
-    - os: linux
-      arch: s390x
-      env: TR_ARCH=local
-      dist: bionic
-    - os: linux
-      arch: arm64-graviton2
-      env: TR_ARCH=local RUN_TESTS=1
-      dist: focal
-      group: edge
-      virt: vm
-    - os: linux
-      arch: arm64-graviton2
-      env: TR_ARCH=local CLANG=1 RUN_TESTS=1
-      group: edge
-      virt: vm
-      dist: bionic
-script:
-  - sudo make -C scripts/ci $TR_ARCH
-after_success:
-  - make -C scripts/ci after_success
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 120000
index 000000000..e3c5a92d9
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+GEMINI.md
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 864caf93e..03875639d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -8,8 +8,8 @@ Here are some useful hints to get involved.
 * We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks;
 * CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting);
 * Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles;
-* Feedback is expected on the GitHub issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu);
-* We accept GitHub pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lists.openvz.org/mailman/listinfo/criu).
+* Feedback is expected on the GitHub issues page and on the [mailing list](https://lore.kernel.org/criu);
+* We accept GitHub pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lore.kernel.org/criu).
 Below we describe in more detail recommend practices for CRIU development.
 * Spread the word about CRIU in [social networks](http://criu.org/Contacts);
 * If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events);
@@ -27,54 +27,137 @@ The repository may contain multiple branches. Development happens in the **criu-
 To clone CRIU repo and switch to the proper branch, run:
 
 ```
-        git clone https://github.com/checkpoint-restore/criu criu
-        cd criu
-        git checkout criu-dev
+git clone https://github.com/checkpoint-restore/criu criu
+cd criu
+git checkout criu-dev
 ```
 
-### Compile
+### Building from source
 
-First, you need to install compile-time dependencies. Check [Installation dependencies](https://criu.org/Installation#Dependencies) for more info.
+Follow these steps to compile CRIU from source code.
 
-To compile CRIU, run:
+#### Installing build dependencies
+
+First, you need to install the required build dependencies. We provide scripts to simplify this process for several Linux distributions in [contrib/dependencies](contrib/dependencies). For a complete list of dependencies, please refer to the [installation guide](https://criu.org/Installation).
+
+##### On Ubuntu/Debian-based systems:
 
 ```
-        make
+./contrib/dependencies/apt-packages.sh
+```
+
+##### On Fedora/CentOS-based systems:
+
+```
+./contrib/dependencies/dnf-packages.sh
+```
+
+##### Using Nix:
+
+```
+nix develop
+```
+
+#### Compiling CRIU
+
+Once the dependencies are installed, you can compile CRIU by running the `make` command from the root of the source directory:
+
+```
+make
 ```
 
 This should create the `./criu/criu` executable.
 
 ## Edit the source code
 
-If you use ctags, you can generate the ctags file by running
-
-```
-        make tags
-```
-
 When you change the source code, please keep in mind the following code conventions:
 
+* code is written to be read, so the code readability is the most important thing you need to have in mind when preparing patches
 * we prefer tabs and indentations to be 8 characters width
-* CRIU mostly follows [Linux kernel coding style](https://www.kernel.org/doc/Documentation/process/coding-style.rst), but we are less strict than the kernel community.
+* we prefer line length of 80 characters or less, more is allowed if it helps with code readability
+* CRIU mostly follows [Linux kernel coding style](https://www.kernel.org/doc/Documentation/process/coding-style.rst), but we are less strict than the kernel community
 
-Other conventions can be learned from the source code itself. In short, make sure your new code
-looks similar to what is already there.
+Other conventions can be learned from the source code itself. In short, make sure your new code looks similar to what is already there.
+
+## Automatic tools to fix coding-style
+
+Important: These tools are there to advise you, but should not be considered as a "source of truth", as tools also make nasty mistakes from time to time which can completely break code readability.
+
+The following command can be used to automatically run a code linter for Python files (ruff), Shell scripts (shellcheck),
+text spelling (codespell), and a number of CRIU-specific checks (usage of print macros and EOL whitespace for C files).
+
+```
+make lint
+```
+
+In addition, we have adopted a [clang-format configuration file](https://www.kernel.org/doc/Documentation/process/clang-format.rst)
+based on the kernel source tree. However, compliance with the clang-format autoformat rules is optional. If the automatic code formatting
+results in decreased readability, we may choose to ignore these errors.
+
+Run the following command to check if your changes are compliant with the clang-format rules:
+
+```
+make indent
+```
+
+This command is built upon the `git-clang-format` tool and supports two options `BASE` and `OPTS`. The `BASE` option allows you to
+specify a range of commits to check for coding style issues. By default, it is set to `HEAD~1`, so that only the last commit is checked.
+If you are developing on top of the criu-dev branch and want to check all your commits for compliance with the clang-format rules, you
+can use `BASE=origin/criu-dev`. The `OPTS` option can be used to pass additional options to `git-clang-format`. For example, if you want
+to check the last *N* commits for formatting errors, without applying the changes to the codebase you can use the following command.
+
+```
+make indent OPTS=--diff BASE=HEAD~N
+```
+
+Note that for pull requests, the "Run code linter" workflow runs these checks for all commits. If a clang-format error is detected
+we need to review the suggested changes and decide if they should be fixed before merging.
+
+Here are some bad examples of clang-format-ing:
+
+* if clang-format tries to force 120 characters and breaks readability - it is wrong:
+
+```
+@@ -58,8 +59,7 @@ static int register_membarriers(void)
+         }
+
+         if (!all_ok) {
+-                fail("can't register membarrier()s - tried %#x, kernel %#x",
+-                     barriers_registered, barriers_supported);
++                fail("can't register membarrier()s - tried %#x, kernel %#x", barriers_registered, barriers_supported);
+                 return -1;
+         }
+```
+
+* if clang-format breaks your beautiful readability friendly alignment in structures, comments or defines - it is wrong:
+
+```
+--- a/test/zdtm/static/membarrier.c
++++ b/test/zdtm/static/membarrier.c
+@@ -27,9 +27,10 @@ static const struct {
+        int register_cmd;
+        int execute_cmd;
+ } membarrier_cmds[] = {
+-       { "",           MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED,           MEMBARRIER_CMD_PRIVATE_EXPEDITED },
+-       { "_SYNC_CORE", MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE },
+-       { "_RSEQ",      MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ,      MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ },
++       { "", MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, MEMBARRIER_CMD_PRIVATE_EXPEDITED },
++       { "_SYNC_CORE", MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE,
++         MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE },
++       { "_RSEQ", MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ },
+ };
+```
 
 ## Test your changes
 
 CRIU comes with an extensive test suite. To check whether your changes introduce any regressions, run
 
 ```
-         make test
+make test
 ```
 
 The command runs [ZDTM Test Suite](https://criu.org/ZDTM_Test_Suite). Check for any error messages produced by it.
 
-In case you'd rather have someone else run the tests, you can use travis-ci for your
-own GitHub fork of CRIU. It will check the compilation for various supported platforms,
-as well as run most of the tests from the suite. See https://travis-ci.org/checkpoint-restore/criu
-for more details.
-
 ## Describe your changes
 
 Describe your problem.  Whether your change is a one-line bug fix or
@@ -102,21 +185,21 @@ If your change fixes a bug in a specific commit, e.g. you found an issue using
 the SHA-1 ID, and the one line summary. For example:
 
 ```
-	Fixes: 9433b7b9db3e ("make: use cflags/ldflags for config.h detection mechanism")
+Fixes: 9433b7b9db3e ("make: use cflags/ldflags for config.h detection mechanism")
 ```
 
 The following `git config` settings can be used to add a pretty format for
 outputting the above style in the `git log` or `git show` commands:
 
 ```
-	[pretty]
-		fixes = Fixes: %h (\"%s\")
+[pretty]
+    fixes = Fixes: %h (\"%s\")
 ```
 
 If your change address an issue listed in GitHub, please use `Fixes:` tag with the number of the issue. For instance:
 
 ```
-	Fixes: #339
+Fixes: #339
 ```
 
 The `Fixes:` tags should be put at the end of the detailed description.
@@ -199,7 +282,7 @@ can certify the below:
 then you just add a line saying
 
 ```
-        Signed-off-by: Random J Developer <random at developer.example.org>
+Signed-off-by: Random J Developer <random at developer.example.org>
 ```
 
 using your real name (please, no pseudonyms or anonymous contributions if
@@ -211,14 +294,14 @@ commit message. To append such line to a commit you already made, use
 
 ```
  From: Random J Developer <random at developer.example.org>
- Subject: [PATCH] component: Short patch description
+Subject: [PATCH] component: Short patch description
 
- Long patch description (could be skipped if patch
- is trivial enough)
+Long patch description (could be skipped if patch
+is trivial enough)
 
- Signed-off-by: Random J Developer <random at developer.example.org>
- ---
- Patch body here
+Signed-off-by: Random J Developer <random at developer.example.org>
+---
+Patch body here
 ```
 
 ## Submit your work upstream
@@ -252,8 +335,8 @@ contains the following:
   revisions should be listed. For example:
 
 ```
-	v3: rebase on the current criu-dev
-	v2: add commit to foo() and update bar() coding style
+v3: rebase on the current criu-dev
+v2: add commit to foo() and update bar() coding style
 ```
 
 If there are only minor updates to the commits in a pull request, it is
@@ -271,7 +354,7 @@ Historically, CRIU worked with mailing lists and patches so if you still prefer
 To create a patch, run
 
 ```
-    git format-patch --signoff origin/criu-dev
+git format-patch --signoff origin/criu-dev
 ```
 
 You might need to read GIT documentation on how to prepare patches
@@ -282,8 +365,8 @@ at all.
 We recommend to post patches using `git send-email`
 
 ```
-  git send-email --cover-letter --no-chain-reply-to --annotate \
-                 --confirm=always --to=criu@openvz.org criu-dev
+git send-email --cover-letter --no-chain-reply-to --annotate \
+               --confirm=always --to=criu@lists.linux.dev criu-dev
 ```
 
 Note that the `git send-email` subcommand may not be in
@@ -295,14 +378,14 @@ If this is your first time using git send-email, you might need to
 configure it to point it to your SMTP server with something like:
 
 ```
-    git config --global sendemail.smtpServer stmp.example.net
+git config --global sendemail.smtpServer stmp.example.net
 ```
 
-If you get tired of typing `--to=criu@openvz.org` all the time,
+If you get tired of typing `--to=criu@lists.linux.dev` all the time,
 you can configure that to be automatically handled as well:
 
 ```
-    git config sendemail.to criu@openvz.org
+git config sendemail.to criu@lists.linux.dev
 ```
 
 If a developer is sending another version of the patch (e.g. to address
@@ -315,7 +398,7 @@ version if needed though).
 
 ### Mail patches
 
-The patches should be sent to CRIU development mailing list, `criu AT openvz.org`. Note that you need to be subscribed first in order to post. The list web interface is available at https://openvz.org/mailman/listinfo/criu; you can also use standard mailman aliases to work with it.
+The patches should be sent to CRIU development mailing list, `criu AT lists.linux.dev`. Note that you need to be subscribed first in order to post. The list web interface is available at https://lore.kernel.org/criu; you can also use standard mailman aliases to work with it.
 
 Please make sure the email client you're using doesn't screw your patch (line wrapping and so on).
 
@@ -332,5 +415,3 @@ sometimes a patch may fly around a week before it gets reviewed.
 Wiki article: [Continuous integration](https://criu.org/Continuous_integration)
 
 CRIU tests are run for each series sent to the mailing list. If you get a message from our patchwork that patches failed to pass the tests, you have to investigate what is wrong.
-
-We also recommend you to [enable Travis CI for your repo](https://criu.org/Continuous_integration#Enable_Travis_CI_for_your_repo) to check patches in your git branch, before sending them to the mailing list.
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 508551450..de0cc448d 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -12,11 +12,9 @@ endif
 
 FOOTER		:= footer.txt
 SRC1		+= crit.txt
-ifeq ($(PYTHON),python3)
 SRC1		+= criu-ns.txt
-endif
 SRC1		+= compel.txt
-SRC1            += amdgpu_plugin.txt
+SRC1		+= criu-amdgpu-plugin.txt
 SRC8		+= criu.txt
 SRC		:= $(SRC1) $(SRC8)
 XMLS		:= $(patsubst %.txt,%.xml,$(SRC))
diff --git a/Documentation/compel.txt b/Documentation/compel.txt
index a44ca22c6..506228f59 100644
--- a/Documentation/compel.txt
+++ b/Documentation/compel.txt
@@ -97,7 +97,10 @@ Following steps are performed to infect the victim process:
     - execute system call: *int compel_syscall(ctl, int syscall_nr, long *ret, int arg ...);*
     - infect victim: *int compel_infect(ctl, nr_thread, size_of_args_area);*
     - cure the victim: *int compel_cure(ctl);* //ctl pointer is freed by this call
-    - Resume victim: *int compel_resume_task(pid, orig_state, state);*
+    - Resume victim: *int compel_resume_task(pid, orig_state, state)* or
+    *int compel_resume_task_sig(pid, orig_state, state, stop_signo).*
+    //compel_resume_task_sig() could be used in case when victim is in stopped state.
+    stop_signo could be read by calling compel_parse_stop_signo().
 
 *ctl* must be configured with blob information by calling *PREFIX_setup_c_header()*, with ctl as its argument.
 *PREFIX* is the argument given to *-p* when calling hgen, else it is deduced from file name.
diff --git a/Documentation/amdgpu_plugin.txt b/Documentation/criu-amdgpu-plugin.txt
similarity index 82%
rename from Documentation/amdgpu_plugin.txt
rename to Documentation/criu-amdgpu-plugin.txt
index 0d490b429..fe76fc3bc 100644
--- a/Documentation/amdgpu_plugin.txt
+++ b/Documentation/criu-amdgpu-plugin.txt
@@ -3,7 +3,7 @@ ROCM Support(1)
 
 NAME
 ----
-amdgpu_plugin - A plugin extension to CRIU to support checkpoint/restore in
+criu-amdgpu-plugin - A plugin extension to CRIU to support checkpoint/restore in
 userspace for AMD GPUs.
 
 
@@ -15,6 +15,7 @@ Checkpoint / Restore inside a docker container
 Pytorch
 Tensorflow
 Using CRIU Image Streamer
+Parallel Restore
 
 DESCRIPTION
 -----------
@@ -22,19 +23,15 @@ Though *criu* is a great tool for checkpointing and restoring running
 applications, it has certain limitations such as it cannot handle
 applications that have device files open. In order to support *ROCm* based
 workloads with *criu* we need to augment criu's core functionality with a
-plugin based extension mechanism. *amdgpu_plugin* provides the necessary support
+plugin based extension mechanism. *criu-amdgpu-plugin* provides the necessary support
 to criu to allow Checkpoint / Restore with ROCm.
 
 
 Dependencies
-~~~~~~~~~~~~~~
+------------
 *amdkfd support*::
     In order to snapshot the *VRAM* and other *GPU* device states, we require
-    an updated version of amdkfd(amdgpu) driver. The kernel patches are under
-    review currently.
-
-*criu 3.16*::
-    This work is rebased on latest criu release available at this time.
+    an updated version of amdkfd(amdgpu) driver.
 
 OPTIONS
 -------
@@ -97,6 +94,15 @@ executing criu command.
     E.g:
     KFD_CAPABILITY_CHECK=1
 
+*KFD_MAX_BUFFER_SIZE*::
+    On some systems, VRAM sizes may exceed RAM sizes, and so buffers for dumping
+    and restoring VRAM may be unable to fit. Set to a nonzero value (in bytes)
+    to set a limit on the plugin's memory usage.
+    Default:0 (Disabled)
+
+    E.g:
+    KFD_MAX_BUFFER_SIZE="2G"
+
 
 AUTHOR
 ------
diff --git a/Documentation/criu.txt b/Documentation/criu.txt
index 8b128f63e..0c9a9e527 100644
--- a/Documentation/criu.txt
+++ b/Documentation/criu.txt
@@ -155,6 +155,17 @@ not compatible with *--external* *dev*.
             notification message contains a file descriptor for
             the master pty
 
+        *query-ext-files*:::
+            called after the process tree is stopped and network is locked.
+            This hook is used only in the RPC mode. The notification reply
+            contains file ids to be added to external file list (may be empty).
+
+*--unprivileged*::
+    This option tells *criu* to accept the limitations when running
+    as non-root. Running as non-root requires *criu* at least to have
+    *CAP_SYS_ADMIN* or *CAP_CHECKPOINT_RESTORE*. For details about running
+    *criu* as non-root please consult the *NON-ROOT* section.
+
 *-V*, *--version*::
     Print program version and exit.
 
@@ -378,6 +389,13 @@ mount -t cgroup -o devices,freezer none devices,freezer
     'size' may be postfixed with a *K*, *M* or *G*, which stands for kilo-,
     mega, and gigabytes, accordingly.
 
+*--ghost-fiemap*::
+    Enable an optimization based on fiemap ioctl that can reduce the
+    number of system calls used when checkpointing highly sparse ghost
+    files. This option is enabled by default, and it can be disabled
+    with *--no-ghost-fiemap*. An automatic fallback to SEEK_HOLE/SEEK_DATA
+    is used when fiemap is not supported.
+
 *-j*, *--shell-job*::
     Allow one to dump shell jobs. This implies the restored task will
     inherit session and process group ID from the *criu* itself.
@@ -444,6 +462,33 @@ The 'mode' may be one of the following:
 
     *nftables*::: Use nftables rules to drop the packets.
 
+    *skip*::: Don't lock the network. If *--tcp-close* is not used, the network
+    must be locked externally to allow CRIU to dump TCP connections.
+
+*--allow-uprobes*::
+    Allow dumping when uprobes vma is present. When used on dump, this option is
+    required on restore as well.
+
+    A uprobes vma is automatically created by the kernel once a uprobe is
+    triggered. This mapping is not removed even once the uprobe is deleted. So,
+    even if a process once had uprobes attached to it, and they're removed by
+    the time the process is dumped, this option is still required because criu
+    has no way of knowing whether there are active uprobes or not.
+
+    When using this option on restore, make sure the uprobes (if any) active on
+    the dumped processes are still active. Otherwise, when execution reaches
+    a uprobe'd location in any of the restored processes, that process will be
+    sent a SIGTRAP.
+
+    As an example, say a uprobe is set at function foo in the executable of the
+    process p_bar. Whenever execution in p_bar reaches function foo, the uprobe
+    is triggered. If the uprobe has been triggered at least once, then the kernel
+    will have created the uprobes vma. To dump p_bar, this option is
+    necessary. After dumping, say the uprobe is deleted. Now, on restoring with
+    this option, once execution reaches function foo, SIGTRAP will be sent to
+    the restored p_bar. Unless it has a signal handler installed for SIGTRAP,
+    it will be terminated and core dumped.
+
 *restore*
 ~~~~~~~~~
 Restores previously checkpointed processes.
@@ -457,8 +502,8 @@ Restores previously checkpointed processes.
 The 'resource' argument can be one of the following:
 +
     - **tty[**__rdev__**:**__dev__**]**
-    - **pipe[**__inode__**]**
-    - **socket[**__inode__*]*
+    - **pipe:[**__inode__**]**
+    - **socket:[**__inode__*]*
     - **file[**__mnt_id__**:**__inode__**]**
     - 'path/to/file'
 
@@ -668,6 +713,13 @@ The 'mode' may be one of the following:
                 build-ID cannot be obtained, 'chksm-first' method will be
                 used. This is the default if mode is unspecified.
 
+*--skip-file-rwx-check*::
+    Skip checking file permissions (r/w/x for u/g/o) on restore.
+
+*--allow-uprobes*::
+    Required when dumped with this option. Refer to this option in the section
+    on dumping for more details.
+
 *check*
 ~~~~~~~
 Checks whether the kernel supports the features needed by *criu* to
@@ -874,6 +926,42 @@ configuration file will overwrite all other configuration file settings
 or RPC options. *This can lead to undesired behavior of criu and
 should only be used carefully.*
 
+NON-ROOT
+--------
+*criu* can be used as non-root with either the *CAP_SYS_ADMIN* capability
+or with the *CAP_CHECKPOINT_RESTORE* capability introduces in Linux kernel 5.9.
+*CAP_CHECKPOINT_RESTORE* is the minimum that is required.
+
+*criu* also needs either *CAP_SYS_PTRACE* or a value of 0 in
+*/proc/sys/kernel/yama/ptrace_scope* (see *ptrace*(2)) to be able to interrupt
+the process for dumping.
+
+Running *criu* as non-root has many limitations and depending on the process
+to checkpoint and restore it may not be possible.
+
+In addition to *CAP_CHECKPOINT_RESTORE* it is possible to give *criu* additional
+capabilities to enable additional features in non-root mode.
+
+Currently *criu* can benefit from the following additional capabilities:
+
+    - *CAP_NET_ADMIN*
+    - *CAP_SYS_CHROOT*
+    - *CAP_SETUID*
+    - *CAP_SYS_RESOURCE*
+
+Note that for some operations, having a capability in a namespace other than
+the init namespace (i.e. the default/root namespace) is not sufficient. For
+example, in order to read symlinks in proc/[pid]/map_files CRIU requires
+CAP_CHECKPOINT_RESTORE in the init namespace; having CAP_CHECKPOINT_RESTORE
+while running in another user namespace (e.g. in a container) does not allow
+CRIU to read symlinks in /proc/[pid]/map_files.
+
+Without access to /proc/[pid]/map_files checkpointing/restoring processes
+that have mapped deleted files may not be possible.
+
+Independent of the capabilities it is always necessary to use "*--unprivileged*" to
+accept *criu*'s limitation in non-root mode.
+
 EXAMPLES
 --------
 To checkpoint a program with pid of *1234* and write all image files into
diff --git a/Documentation/logo.svg b/Documentation/logo.svg
new file mode 100644
index 000000000..f713e72b7
--- /dev/null
+++ b/Documentation/logo.svg
@@ -0,0 +1,136 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 16.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+	 width="560px" height="560px" viewBox="0 0 560 560" enable-background="new 0 0 560 560" xml:space="preserve">
+<path opacity="0.3" fill="#990000" d="M315.137,360.271c-18.771-7.159-41.548-8.85-68.479-8.85c-16.661,0-46.255,2.939-74.654,3.38
+	c11.209-4.884,20.734-10.265,24.842-16.87c14.531-23.346,17.645-65.893,17.645-65.893l-20.758,3.114c0,0-2.591,35.8-16.085,47.733
+	c-5.35,4.736-15.96,7.834-27.916,10.856c2.447-26.071,29.477-57.552,29.477-57.552l-14.874-3.966l-5.88-7.448
+	c0,0-3.011,1.761-7.588,5.315c-18.298,4.208-75.946,20.443-75.946,57.983c0,15.292,5.77,26.308,14.768,34.244
+	c-22.858,26.966-20.755,61.618-20.755,61.618s-8.945,16.61-8.021,31.254c2.083,32.973,34.931,25.097,44.313,26.374
+	c9.644,1.313,34.313-4.18,34.313-4.18s-16.276-2.639-15.329-18.562c0.5-8.369-0.947-27.628-21.404-37.307
+	c-1.13-10.066,2.111-18.309,6.379-28.015c18.452,45.263,92.601,53.97,92.601,53.97c0.393-0.097-10.269,20.047,0.221,35.632
+	c4.652,6.915,18.284,10.019,22.436,19.356c4.151,9.341,2.199,30.354,2.199,30.354s21.267-16.864,27.239-30.18
+	c3.334-7.432,25.989,0.926,25.989-34.047c0-14.077-12.26-26.841-13.675-29.815c-20.858-20.334-5.427-4.743,2.677-8.236
+	c12.758-5.499,35.412,11.657,35.412,11.657s-10.402-20.119-11.437-31.013c-0.795-8.335-4.537-16.816-16.624-30.042
+	c7.166-0.752,20.362,2.327,20.362,2.327s-5.202,11.251-0.879,25.515c3.588,11.84,7.193,7.193,14.736,14.737
+	c6.599,6.598,3.146,26.284,3.146,26.284s4.674-4.513,18.081-18.235c9.072-9.29,23.645-16.717,23.645-47.86
+	C355.312,365.969,334.97,360.979,315.137,360.271z M134.108,285.901c-11.5,13.048-23.667,32.329-28.23,58.293
+	c-4.821-3.519-7.613-8.1-7.613-14.043C98.265,309.699,117.078,295.016,134.108,285.901z"/>
+<path fill="#990000" d="M382.184,115.435c3.654,1.208,7.327,2.37,10.968,3.444c14.16,4.183,26.745-9.798,26.745-9.798
+	s-8.785-2.243-17.857-3.497c12.173-2.653,21.085-18.66,21.085-18.66s-17.366,4.819-27.224,5.087
+	c-2.042,0.057-4.107,0.118-6.189,0.186c2.464-0.37,4.925-0.847,7.361-1.485c14.201-3.714,21.505-23.382,21.505-23.382
+	s-15.411,6.743-24.951,9.239c-2.694,0.703-5.438,1.437-8.197,2.185c3.038-1.071,6.008-2.306,8.815-3.82
+	c12.922-6.965,12.241-29.347,12.241-29.347s-10.162,11.926-18.844,16.605c-3.557,1.916-7.199,3.904-10.846,5.911
+	c3.798-2.277,7.45-4.743,10.596-7.569c10.918-9.814,7.722-29.605,7.722-29.605s-9.801,12.54-17.135,19.131
+	c-8.939,8.037-18.775,14.104-27.014,21.81c-6.427,6.011-25.14,35.236-36.812,46.283c-11.671,11.047-18.301,12.476-19.159,14.388
+	c-0.863,1.913,1.006,30.46-14.078,39.145c-16.476-21.583-50.565-44.007-53.101-72.033c-2.079-22.959,5.209-34.055,19.149-35.316
+	c14.994-1.359,15.998,24.507,15.998,24.507s-1.379,1.064-1.708,6.391c-0.097,0.629-0.145,1.272-0.083,1.934
+	c0.004,0.031,0.008,0.06,0.011,0.091c-0.014,1.674,0.065,3.664,0.278,6.039c1.131,12.474,4.53,14.574,4.53,14.574l2.075-0.722
+	c0,0-2.24-4.079-2.554-7.529c-0.172-1.917-0.187-3.556-0.079-4.977c0.45,0.067,0.949,0.081,1.506,0.031
+	c4.398-0.399,6.049-4.141,5.65-8.539c-0.042-0.45-0.069-0.885-0.094-1.316c2.485-26.032-1.756-29.637,4.788-41.391
+	c9.032-16.218,17.279-16.015,17.279-16.015l1.402-8.155c0,0-6.817,2.462-14.819,13.652c-8.833,12.354-8.983,26.229-9.066,47.958
+	c-0.188-0.761-0.502-1.37-1.017-1.784c-2.457-11.192-9.087-32.13-24.112-30.77c-16.72,1.514-29.419,14.974-26.773,44.171
+	c3.609,39.832,26.186,52.701,29.829,80.84c-13.47-2.349-23.883-10.656-30.866-20.282c-7.803-10.749-7.297-22.949-8.324-24.779
+	c-1.027-1.829-7.761-2.662-20.367-12.627c-12.605-9.965-33.845-37.41-40.78-42.824c-8.895-6.942-19.229-12.111-28.848-19.32
+	c-7.892-5.915-18.769-17.531-18.769-17.531s-1.419,19.995,10.323,28.8c3.386,2.536,7.246,4.665,11.229,6.597
+	c-3.808-1.674-7.616-3.33-11.327-4.925c-9.062-3.887-20.246-14.861-20.246-14.861s1.31,22.353,14.803,28.143
+	c2.931,1.257,6,2.223,9.12,3.019c-2.818-0.5-5.615-0.985-8.357-1.447c-9.728-1.636-25.677-6.981-25.677-6.981
+	s9.025,18.94,23.5,21.376c2.485,0.417,4.975,0.674,7.466,0.822c-2.08,0.118-4.148,0.242-6.183,0.368
+	c-9.843,0.61-27.566-2.645-27.566-2.645S85.667,120.333,110,120c-8.922,2.057-25.678,6.008-25.678,6.008s13.778,12.806,27.508,7.38
+	c3.533-1.394,7.087-2.876,10.62-4.404c-3.726,1.804-7.424,3.581-11.005,5.273c-8.963,4.243-19.428,10.176-19.428,10.176
+	s15.069,9.759,27.305,1.497c0.558-0.378,3.121-1.76,3.678-2.143c-7.904,5.808-19.754,14.937-19.754,14.937
+	s15.802,6.027,27.092-3.354c4.663-3.875,8.104-7.185,12.238-11.618c-3.773,4.55-6.699,8.018-10.634,12.106
+	c-6.839,7.104-13.06,19.791-13.06,19.791s15.597,0.39,24.359-11.388c4.488-6.035,7.482-11.633,10.974-18.191
+	c-3.113,6.479-5.468,11.95-8.911,17.788c-5.018,8.49-7.574,22.624-7.574,22.624s15.342-3.655,21.07-17.17
+	c2.231-5.266,2.107-9.783,3.694-15.291c-1.257,5.272-0.666,9.475-2.24,14.319c-3.045,9.379,0.011,25.554,0.011,25.554
+	s9.713-5.855,10.359-20.52c0.006-0.153,0.5-8.47,0.5-8.625L171,171.496c0,9.917,6.295,23.276,6.295,23.276
+	s11.459-10.649,9.369-25.266c-0.188-1.31-0.1-2.627-0.305-3.947c0.408,1.507,0.998,3.016,1.493,4.524
+	c3.075,9.429,3.5,15.957,3.5,15.957s6.483,1.251,8.73-1.594c0.764,5.625-0.843,10.2-0.843,10.2s5.471-1.1,8.893-3.756
+	c0.705,5.331,0.155,8.789,0.155,8.789s5.106-1.603,8.419-4.323c0.611,4.642,1.764,7.542,1.764,7.542s6.398-0.88,9.021-5.393
+	c0.199,0.038,0.395,0.079,0.59,0.117c2.269,4.875,1.438,8.517,1.438,8.517s7.492-2.14,9.492-6.14c0.003,0,0.007,0,0.01,0
+	c1.798,4,2.727,6.102,2.727,6.102s4.853-2.349,7.093-6.064c0.189,0.009,0.364-0.093,0.547-0.086
+	c-4.702,19.629-23.62,29.658-42.207,42.764c-1.392,0.981-2.712,1.925-3.97,2.884c-2.891,1.512-6.788,3.495-11.311,5.724
+	c-9.829,3.363-23.7,6.057-41.038,4.084c-9.798-1.115-21.037,10.02-21.037,10.02s6.87,4.843,16.565,5.028
+	c-8.819,3.621-17.438,12.632-17.438,12.632s0.045,0.019,0.069,0.029c-27.096,11.688-51.621,29.917-47.651,57.105
+	c2.375,16.27,14.692,25.475,31.704,30.254c-17.81,14.742-32.921,36.129-30.707,60.59c0.134,1.487,0.309,2.916,0.508,4.311
+	c-2.209,5.6-3.288,17.842-2.674,24.886c0.949,10.838,13.686,8.662,18.219,6.729c14.139,12.202,32.258,10.252,32.258,10.252
+	s-17.301,1.211-30.306-11.156c5.551-2.659,6.424-3.925,6.788-11.579c0.36-7.61-9.104-20.759-20.57-21.966
+	c-1.25-20.07,9.861-43.32,30.603-60.203c0.02,0.249,0.023,0.491,0.048,0.742c4.248,46.957,30.584,54.634,81.148,63.26
+	c12.603,2.15,22.04,5.821,29.042,10.457c-3.844,5.388-5.706,21.559-2.895,32.325c3.045,11.655,12.647,14.53,19.429,14.955
+	c-3.304,16.035-11.235,29.024-11.235,29.024s10.015-11.628,15.04-29.016c0.48-0.031,0.928-0.069,1.319-0.114
+	c10.922-1.262,16.17-11.338,14.743-23.071c-1.195-9.826-13.974-24.54-28.598-25.992c-33.117-21.52-109.104-9.05-113.877-61.769
+	c-0.341-3.746-0.517-7.367-0.571-10.888c5.709,1.111,11.782,1.844,18.104,2.244c14.111,28.517,62.158,22.269,95.818,20.694
+	c1.764,3.09,7.043,7.064,13.929,9.779c11.751,4.633,14.889,3.742,18.869,1.502c1.484-0.835,2.828-1.92,3.979-3.155
+	c10.822,10.456,25.37,30.251,25.37,30.251s-12.29-22.284-22.733-33.97c2.601-4.923,2.433-10.619-2.559-13.297
+	c-6.956-3.732-31.321,1.581-36.316,4.981c-30.811,1.668-71.853,6.551-89.576-16.474c41.005,1.192,88.786-9.133,102.385-10.365
+	c21.726-1.966,47.319,1.367,64.887,8.228c-0.783,5.681,1.867,18.47,4.641,25.318c3.316,8.197,11.561,5.887,16.562,3.028
+	c-0.588,13.3-4.495,22.638-4.495,22.638s7.86-14.125,9.117-26.183c4.354-4.041,4.774-5.562,2.904-12.887
+	c-1.849-7.24-14.317-16.821-25.47-15.096c-21.855-8.906-54.594-11.087-75.74-9.175c-18.253,1.653-61.404,10.802-97.611,10.237
+	c-1.895-3.338-3.402-7.122-4.412-11.479c5.113-2.364,10.551-4.388,16.307-5.975c30.999-8.551,40.97-29.258,42.943-48.579
+	c1.127,1.303,1.938,2.069,1.938,2.069s7.087-12.679,5.522-27.275c-0.264-2.469-0.429-4.737-0.553-6.911
+	c2.499,6.741,7.778,13.001,7.778,13.001s16.438-20.208,5.846-27.268c-11.583-7.714-6.836-13.283-4.31-15.299
+	c3.354-1.984,6.973-3.94,10.859-5.817c26.561-12.817,59.903-20.002,64.443-40.039c0.265-1.172,0.388-2.34,0.443-3.507
+	c3.701,2.396,9.165,2.053,9.165,2.053s-0.367-2.88-0.601-7.556c3.747,2.081,8.874,1.758,8.874,1.758s-0.986-2.319-1.255-7.689
+	c3.846,1.998,8.434,2.278,8.434,2.278s-0.725-2.246-1.24-5.573c3.788,0.719,8.84,0.419,8.84,0.419s-3.543-7.302-1.316-16.965
+	c0.357-1.547,0.666-3.09,0.938-4.626c-0.087,1.332-0.169,2.662-0.238,3.985c-0.783,14.742,10.85,24.47,10.85,24.47
+	S337,172.178,337,162.303c0-0.021,0-0.042,0-0.061c0,0.153-0.804,0.309-0.782,0.46c1.951,14.548,13.499,20.839,13.499,20.839
+	s2.388-16.471-1.478-25.542c-1.998-4.686-3.966-9.742-5.688-14.881c2.068,5.344,4.374,10.673,7.067,15.72
+	c6.909,12.952,20.498,15.406,20.498,15.406s-1.832-14.029-7.581-22.041c-3.952-5.505-7.874-11.654-11.551-17.83
+	c4.059,6.22,8.622,12.438,13.631,18.048c9.774,10.953,25.27,9.178,25.27,9.178s-7.323-12.085-14.767-18.552
+	c-4.283-3.722-8.589-7.824-12.754-12.019c4.513,4.047,9.319,7.944,14.31,11.39c12.077,8.341,27.281,0.931,27.281,0.931
+	s-10.533-7.219-18.926-12.302c0.595,0.332,1.186,0.662,1.777,0.988c12.922,7.14,28.146-3.013,28.146-3.013
+	s-12.036-5.887-21.343-9.313C389.896,118.341,386.055,116.903,382.184,115.435z M116.917,367.418
+	c-0.172,0.131-0.344,0.268-0.516,0.398c-17.301-3.899-29.646-12.415-31.124-28.752c-2.244-24.777,21.669-42.631,47.562-54.59
+	c3.553,1,9.203,1.919,15.541,0.503c-4.694,4.817-7.998,9.859-7.998,9.859s2.076,0.564,5.3,0.733
+	C133.582,308.673,115.917,333.715,116.917,367.418z M146.295,295.598c1.834,0.062,3.979-0.014,6.326-0.386
+	c-0.141,0.365-0.274,0.72-0.401,1.069c-10.511,14.57-18.745,34.363-17.404,59.912c-4.522,2.267-9.248,5.074-13.939,8.343
+	C122.237,330.3,136.218,307.613,146.295,295.598z M121.776,368.86c4.131-2.979,8.589-5.697,13.361-8.115
+	c0.358,3.527,1.032,6.741,2.025,9.634C131.805,370.131,126.629,369.657,121.776,368.86z M150.478,350.278
+	c-3.791,0.864-8.16,2.403-12.812,4.546c-0.062-0.425-0.168-0.803-0.224-1.236c-2.557-19.875,3.873-37.276,13.005-51.347
+	c0,0.005-0.007,0.032-0.007,0.032s13.533-3.395,23.088-14.017c-1.715,7.205,0.158,14.79,0.158,14.79s9.774-5.185,16.654-15.216
+	c-0.131,5.548,2.84,10.803,5.451,14.331C193.303,321.731,182.711,342.934,150.478,350.278z M259.516,275.357
+	c0.846-4.127,1.649-8.135,2.42-12.012c2.199-4.002,5.203-6.524,9.011-7.55c3.808-1.04,7.78-1.559,11.919-1.559l1.739-17.042
+	c-5.942,0.378-11.657,1.419-17.144,3.105c-5.492,1.672-10.946,3.611-16.369,5.8c-4.526,4.131-7.915,8.875-10.169,14.237
+	c-2.262,5.359-3.755,11.051-4.655,17.055c-0.906,6.007-1.268,12.17-1.268,18.489v18.209c0,3.23,0.201,6.368,0.779,9.393
+	c0.584,3.045,1.728,5.66,3.543,7.85c3.614,2.588,7.203,3.85,10.822,3.771c3.619-0.066,7.224-0.712,10.842-1.925
+	c3.611-1.23,7.162-2.757,10.647-4.558c3.484-1.811,6.904-3.293,10.266-4.457l7.159-14.521c-2.066,0.505-4.2,1.23-6.394,2.127
+	c-2.199,0.9-4.453,1.643-6.777,2.224c-2.322,0.585-4.649,0.773-6.977,0.585c-2.322-0.189-4.649-1.2-6.976-2.994
+	c-2.063-3.626-3.355-7.475-3.87-11.541c-0.519-4.065-0.612-8.165-0.289-12.296C258.1,283.619,258.674,279.488,259.516,275.357z
+	 M367.6,320.582c-0.196-3.025-1.001-5.908-2.42-8.623c-1.031-3.608-2.649-6.588-4.846-8.905c-2.193-2.333-4.682-4.162-7.458-5.516
+	c-2.773-1.358-5.712-2.364-8.812-3.014c-3.098-0.643-6.004-1.056-8.717-1.259c-2.711-0.188-5.101-0.285-7.166-0.285
+	s-3.419-0.062-4.064-0.189c0.25-1.037,0.449-2.302,0.574-3.783c0.133-1.481,0.322-2.866,0.584-4.162
+	c0.258-1.419,0.512-2.977,0.773-4.65c6.326,0,12.073-0.581,17.242-1.749c5.165-1.148,9.688-3.059,13.558-5.705
+	c3.876-2.646,7.135-6.131,9.781-10.469c2.649-4.318,4.558-9.583,5.715-15.776c-5.684,0-11.596,0.029-17.727,0.093
+	s-12.328,0.158-18.593,0.284c-6.266,0.143-12.431,0.332-18.5,0.583c-6.066,0.27-11.812,0.584-17.236,0.979
+	c0.128,0,0.221,1.387,0.293,4.161c0.062,2.775,0.062,6.465,0,11.035c-0.072,4.588-0.2,9.788-0.386,15.589
+	c-0.199,5.819-0.49,11.73-0.875,17.734c-0.386,6.007-0.878,11.901-1.451,17.72c-0.584,5.815-1.262,10.908-2.035,15.304
+	c5.552-0.268,11.432-0.488,17.624-0.677c2.162-0.065,4.33-0.127,6.503-0.176l1.247-5.547c0.385-2.192,0.708-4.776,0.969-7.739
+	c0.259-2.979,0.513-5.754,0.773-8.338c0.259-3.093,0.386-6.196,0.386-9.286c0.646-0.127,1.677-0.206,3.103-0.206
+	c1.547,0,3.225,0.269,5.039,0.773c1.804,0.519,3.68,1.292,5.612,2.334c1.938,1.041,3.615,2.522,5.034,4.46
+	c1.42,1.925,2.45,4.352,3.104,7.252c0.638,2.914,0.638,6.495,0,10.75l0.631,5.39c1.609,0.033,3.207,0.079,4.796,0.144
+	c6.068,0.189,11.812,0.471,17.234,0.866C367.891,326.747,367.795,323.609,367.6,320.582z M327.506,263.345
+	c0.707-4.397,1.323-8.133,1.835-11.238c1.168-0.521,2.522-0.835,4.069-0.962c1.549-0.125,3.103-0.205,4.65-0.205
+	c1.677,0,3.291,0.031,4.845,0.112c1.547,0.062,2.901,0.093,4.069,0.093c0,1.151-0.041,2.586-0.103,4.256
+	c-0.066,1.688-0.189,3.42-0.389,5.232c-0.189,1.815-0.512,3.578-0.97,5.331c-0.446,1.732-1.127,3.182-2.034,4.347
+	c-0.896,0.918-2.128,1.657-3.681,2.224c-1.543,0.584-3.159,1.042-4.84,1.357c-1.677,0.33-3.291,0.55-4.838,0.677
+	c-1.555,0.141-2.78,0.207-3.682,0.207C326.439,271.542,326.798,267.727,327.506,263.345z M393.035,246.385
+	c-2.517,0.33-4.84,0.584-6.97,0.773c-2.135,0.205-3.781,0.172-4.939-0.096l3.678,2.711c0.899,5.423,1.356,11.051,1.356,16.851
+	c0,5.818-0.195,11.695-0.584,17.642c-0.385,5.941-0.872,11.805-1.45,17.624c-0.581,5.801-1,11.427-1.261,16.85
+	c-0.907,4.522-1.519,9.238-1.835,14.139c-0.331,4.901-0.843,9.713-1.554,14.425c-0.708,4.712-1.812,9.3-3.297,13.761
+	c-1.48,4.443-3.773,8.481-6.869,12.107l-2.908,1.543c0.513,0.52,1.323,0.993,2.42,1.45c1.093,0.457,1.842,0.678,2.23,0.678
+	c2.708-3.23,4.712-6.558,6.004-9.978c1.286-3.419,2.64-6.746,4.069-9.963c1.544-2.711,2.969-5.626,4.261-8.716
+	c1.286-3.107,2.774-6.008,4.455-8.719c1.671-2.708,3.681-5.045,6.008-6.984c2.322-1.938,5.285-3.15,8.903-3.67
+	c0.386-6.319,0.836-13.114,1.354-20.335c0.517-7.235,1.001-14.534,1.451-21.896c0.457-7.361,0.846-14.596,1.168-21.689
+	c0.323-7.111,0.482-13.684,0.482-19.769c-2.713,0-5.458,0.143-8.229,0.394C398.196,245.785,395.553,246.07,393.035,246.385z
+	 M483.002,245c0,4-0.061,5.618-0.188,7.038c-0.135,1.419-0.323,3.525-0.581,5.259c-0.261,1.751-0.584,4.166-0.972,6.752
+	c-0.386,2.584-0.843,6.388-1.354,11.165c-0.519,4.791-1.135,11.551-1.839,19.167c-0.715,7.612-1.519,18.619-2.427,29.619h-32.15
+	c0-15,1.065-26.686,3.192-39.535c2.138-12.847,4.101-25.911,5.911-38.695c-5.034,0.52-9.85,1.042-14.427,1.812
+	c-4.589,0.773-9.136,0.898-13.662,0.52c-0.513,13.682-1.543,27.507-3.097,41.521c-1.553,13.998-3.23,27.586-5.038,40.749
+	c4.52,0,9.396-0.166,14.631-0.496c5.224-0.316,10.292-0.479,15.2-0.479c0.649,1.152,1.285,2.776,1.942,4.838
+	c0.638,2.065,1.22,4.318,1.738,6.779c0.517,2.457,0.997,5.027,1.454,7.753c0.447,2.715,0.873,5.424,1.258,8.135
+	c0.9,6.32,1.681,13.102,2.327,20.336c2.192-6.196,4.454-12.28,6.777-18.209c1.938-5.045,4.004-10.262,6.196-15.699
+	c2.199-5.423,4.327-10.073,6.393-13.936c2.323,0.254,4.649,0.316,6.974,0.188c2.326-0.124,4.681-0.25,7.071-0.392
+	c2.386-0.127,4.775-0.127,7.163,0c2.389,0.142,4.681,0.52,6.88,1.165c-0.257-6.716-0.164-13.619,0.293-20.728
+	c0.449-7.093,1.096-14.204,1.932-21.297c0.841-7.111,1.707-15.14,2.615-22.062c0.907-6.901,1.742-13.27,2.522-21.27H483.002z"/>
+</svg>
diff --git a/GEMINI.md b/GEMINI.md
new file mode 100644
index 000000000..e56c1de12
--- /dev/null
+++ b/GEMINI.md
@@ -0,0 +1,136 @@
+# CRIU (Checkpoint/Restore In User-space)
+
+CRIU is a tool for saving the state of a running application to a set of files
+(checkpointing) and restoring it back to a live state. It is primarily used for
+live migration of containers, in-place updates, and fast application startup.
+
+It is implemented as a command-line tool called `criu`. The two primary commands
+are `dump` and `restore`.
+
+- `dump`: Saves a process tree and all its related resources (file
+  descriptors, IPC, sockets, namespaces, etc.) into a collection of image
+  files.
+- `restore`: Restores processes from image files to the same state they were
+  in before the dump.
+
+## Quick Start
+
+To get a feel for `criu`, you can try checkpointing and restoring a simple
+process.
+
+1.  **Run a simple process:**
+    Open a terminal and run a command that will run for a while. Find its PID.
+    ```bash
+    sleep 1000 &
+    [1] 12345
+    ```
+
+2.  **Dump the process:**
+    As root, use `criu dump` with the process ID (`-t`) and a directory for the
+    image files (`-D`).
+    ```bash
+    sudo criu dump -t 12345 -D /tmp/sleep_images -v4 --shell-job
+    ```
+    The `sleep` process will no longer be running.
+
+3.  **Restore the process:**
+    Use `criu restore` to bring the process back to life from the images.
+    ```bash
+    sudo criu restore -D /tmp/sleep_images -v4 --shell-job
+    ```
+    The `sleep` process will be running again as if nothing happened.
+
+# For Developers and Contributors
+
+This section contains more technical details about CRIU's internals and
+development process.
+
+## Dump Process
+
+On dump, CRIU uses available kernel interfaces to collect information about
+processes. For properties that can only be retrieved from within the process
+itself, CRIU injects a binary blob (called a "parasite") into the process's
+address space and executes it in the context of one of the process's threads.
+This injection is handled by a subproject called **Compel**.
+
+## Restore Process
+
+On restore, CRIU reads the image files to reconstruct the processes. The goal is
+to restore them to the exact state they were in before the dump. The restore
+process is divided into several stages (defined as `CR_STATE_*` in
+`./criu/include/restorer.h`).
+
+The main `criu` process acts as a coordinator. It first restores resources with
+inter-process dependencies (file descriptors, sockets, shared memory,
+namespaces, etc.). It then forks the process tree and sets up namespaces.
+Finally, it restores process-specific resources like file descriptors and memory
+mappings.
+
+A key step involves a small, self-contained binary called the "restorer". All
+restored processes switch to executing this code, which unmaps the CRIU-specific
+memory and restores the application's original memory mappings. On the final
+step, the restorer calls `sigreturn` on a prepared signal frame to resume the
+process with the state it had at the moment of the dump.
+
+## Compel
+
+Compel is a subproject responsible for generating the binary blobs used for the
+parasite code (for dumping) and the restorer code (for restoring). It provides a
+library for injecting and executing this code within the target process's
+address space. It is a separate project because the logic for generating and
+injecting Position-Independent Executable (PIE) code is complex and
+self-contained.
+
+## Coding Style
+
+The C code in the CRIU project follows the
+[Linux Kernel Coding Style](https://www.kernel.org/doc/html/latest/process/coding-style.html).
+Here are some of the main points:
+
+-   **Indentation**: Use tabs, which are set to 8 characters.
+-   **Line Length**: The preferred line limit is 80 characters, but it can be
+    extended to 120 if it improves code readability.
+-   **Braces**:
+    -   The opening brace for a function goes on a new line.
+    -   The opening brace for a block (like `if`, `for`, `while`, `switch`) goes
+        on the same line.
+-   **Spaces**: Use spaces around operators (`+`, `-`, `*`, `/`, `%`, `<`, `>`,
+    `=`, etc.).
+-   **Naming**: Use descriptive names for functions and variables.
+-   **Comments**: Use C-style comments (`/* ... */`). For multi-line comments,
+    the preferred format is:
+    ```c
+    /*
+     * This is a multi-line
+     * comment.
+     */
+    ```
+
+## Code Layout
+
+The code is organized into the following directories:
+
+-   `./compel`: The Compel sub-project.
+-   `./criu`: The main `criu` tool source code.
+-   `./images`: Protobuf descriptions for the image files.
+-   `./test`: All tests.
+-   `./test/zdtm`: The Zero-Downtime Migration (ZDTM) test suite.
+-   `./test/zdtm.py`: The executor script for ZDTM tests.
+-   `./scripts`: Helper scripts.
+-   `./scripts/build`: Docker image files used for CI and cross-compilation
+    checks.
+-   `./crit`: A tool to inspect and manipulate CRIU image files.
+-   `./soccr`: A library for TCP socket checkpoint/restore.
+
+## Tests
+
+The main test suite is ZDTM. Here is an example of how to run a single test:
+
+```bash
+sudo ./test/zdtm.py run -t zdtm/static/env00
+```
+
+Each ZDTM test has three stages: preparation, C/R, and results checks. During
+the test, a process calls `test_daemon()` to signal it is ready for C/R, then
+calls `test_waitsig()` to wait for the C/R stage to complete. After being
+restored, the test checks that all its resources are still in a valid state.
diff --git a/INSTALL.md b/INSTALL.md
index d786d06eb..af0702518 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -1,11 +1,31 @@
+## Building CRIU from source code
+
+First, you need to install compile-time dependencies. Check [Installation dependencies](https://criu.org/Installation#Dependencies) for more info.
+
+To compile CRIU, run:
+```
+make
+```
+This should create the `./criu/criu` executable.
+
+To change the default behaviour of CRIU, the following variables can be passed
+to the make command:
+
+ * **NETWORK_LOCK_DEFAULT**, can be set to one of the following
+   values: `NETWORK_LOCK_IPTABLES`, `NETWORK_LOCK_NFTABLES`,
+   `NETWORK_LOCK_SKIP`. CRIU defaults to `NETWORK_LOCK_IPTABLES`
+   if nothing is specified. If another network locking backend is
+   needed, `make` can be called like this:
+   `make NETWORK_LOCK_DEFAULT=NETWORK_LOCK_NFTABLES`
+
 ## Installing CRIU from source code
 
 Once CRIU is built one can easily setup the complete CRIU package
 (which includes executable itself, CRIT tool, libraries, manual
 and etc) simply typing
-
-    make install
-
+```
+make install
+```
 this command accepts the following variables:
 
  * **DESTDIR**, to specify global root where all components will be placed under (empty by default);
@@ -16,17 +36,17 @@ this command accepts the following variables:
  * **LIBDIR**, to specify directory where to put libraries (guess the correct path  by default).
 
 Thus one can type
-
-    make DESTDIR=/some/new/place install
-
+```
+make DESTDIR=/some/new/place install
+```
 and get everything installed under `/some/new/place`.
 
 ## Uninstalling CRIU
 
 To clean up previously installed CRIU instance one can type
-
-    make uninstall
-
+```
+make uninstall
+```
 and everything should be removed. Note though that if some variable (**DESTDIR**, **BINDIR**
 and such) has been used during installation procedure, the same *must* be passed with
 uninstall action.
diff --git a/MAINTAINERS b/MAINTAINERS
index bb153f1ab..8fee8e571 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4,3 +4,5 @@ Mike Rapoport <rppt@kernel.org>
 Dmitry Safonov <0x7f454c46@gmail.com>
 Adrian Reber <areber@redhat.com>
 Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Radostin Stoyanov <rstoyanov@fedoraproject.org>
+Alexander Mikhalitsyn <alexander@mihalicyn.com>
diff --git a/Makefile b/Makefile
index 436ebfd0d..e26807158 100644
--- a/Makefile
+++ b/Makefile
@@ -19,7 +19,7 @@ endif
 
 #
 # Supported Architectures
-ifneq ($(filter-out x86 arm aarch64 ppc64 s390 mips,$(ARCH)),)
+ifneq ($(filter-out x86 arm aarch64 ppc64 s390 mips loongarch64 riscv64,$(ARCH)),)
         $(error "The architecture $(ARCH) isn't supported")
 endif
 
@@ -35,18 +35,18 @@ ifeq ($(ARCH),arm)
         ARMV		:= $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7')
 
         ifeq ($(ARMV),6)
-                USERCFLAGS += -march=armv6
+                ARCHCFLAGS += -march=armv6
         endif
 
         ifeq ($(ARMV),7)
-                USERCFLAGS += -march=armv7-a+fp
+                ARCHCFLAGS += -march=armv7-a+fp
         endif
 
         ifeq ($(ARMV),8)
-                # Running 'setarch linux32 uname -m' returns armv8l on travis aarch64.
+                # Running 'setarch linux32 uname -m' returns armv8l on aarch64.
                 # This tells CRIU to handle armv8l just as armv7hf. Right now this is
                 # only used for compile testing. No further verification of armv8l exists.
-                USERCFLAGS += -march=armv7-a
+                ARCHCFLAGS += -march=armv7-a
                 ARMV := 7
         endif
 
@@ -64,6 +64,8 @@ endif
 
 ifeq ($(ARCH),aarch64)
         DEFINES		:= -DCONFIG_AARCH64
+        CC_MBRANCH_PROT := $(shell $(CC) -c -x c /dev/null -mbranch-protection=none -o /dev/null >/dev/null 2>&1 && echo "-mbranch-protection=none")
+        CFLAGS_PIE	:= $(CC_MBRANCH_PROT)
 endif
 
 ifeq ($(ARCH),ppc64)
@@ -80,6 +82,14 @@ ifeq ($(ARCH),mips)
         DEFINES		:= -DCONFIG_MIPS
 endif
 
+ifeq ($(ARCH),loongarch64)
+        DEFINES		:= -DCONFIG_LOONGARCH64
+endif
+
+ifeq ($(ARCH),riscv64)
+        DEFINES		:= -DCONFIG_RISCV64
+endif
+
 #
 # CFLAGS_PIE:
 #
@@ -102,10 +112,20 @@ export PROTOUFIX DEFINES
 #
 # Independent options for all tools.
 DEFINES			+= -D_FILE_OFFSET_BITS=64
+DEFINES			+= -D_LARGEFILE64_SOURCE
 DEFINES			+= -D_GNU_SOURCE
 
 WARNINGS		:= -Wall -Wformat-security -Wdeclaration-after-statement -Wstrict-prototypes
 
+# -Wdangling-pointer results in false warning when we add a list element to
+# local list head variable. It is false positive because before leaving the
+# function we always check that local list head variable is empty, thus
+# insuring that pointer to it is not dangling anywhere, but gcc can't
+# understand it.
+# Note: There is similar problem with kernel list, where this warning is also
+# disabled: https://github.com/torvalds/linux/commit/49beadbd47c2
+WARNINGS		+= -Wno-dangling-pointer -Wno-unknown-warning-option
+
 CFLAGS-GCOV		:= --coverage -fno-exceptions -fno-inline -fprofile-update=atomic
 export CFLAGS-GCOV
 
@@ -113,11 +133,19 @@ ifeq ($(ARCH),mips)
 WARNINGS		:= -rdynamic
 endif
 
+ifeq ($(ARCH),loongarch64)
+WARNINGS		+= -Wno-implicit-function-declaration
+endif
+
 ifneq ($(GCOV),)
         LDFLAGS         += -lgcov
         CFLAGS          += $(CFLAGS-GCOV)
 endif
 
+ifneq ($(NETWORK_LOCK_DEFAULT),)
+	CFLAGS		+= -DNETWORK_LOCK_DEFAULT=$(NETWORK_LOCK_DEFAULT)
+endif
+
 ifeq ($(ASAN),1)
 	CFLAGS-ASAN	:= -fsanitize=address
 	export		CFLAGS-ASAN
@@ -142,12 +170,12 @@ export GMON GMONLDOPT
 endif
 
 AFLAGS			+= -D__ASSEMBLY__
-CFLAGS			+= $(USERCFLAGS) $(WARNINGS) $(DEFINES) -iquote include/
+CFLAGS			+= $(USERCFLAGS) $(ARCHCFLAGS) $(WARNINGS) $(DEFINES) -iquote include/
 HOSTCFLAGS		+= $(WARNINGS) $(DEFINES) -iquote include/
 export AFLAGS CFLAGS USERCLFAGS HOSTCFLAGS
 
 # Default target
-all: criu lib crit
+all: criu lib crit cuda_plugin
 .PHONY: all
 
 #
@@ -250,26 +278,19 @@ criu: $(criu-deps)
 	$(Q) $(MAKE) $(build)=criu all
 .PHONY: criu
 
-crit/Makefile: ;
-crit/%: criu .FORCE
-	$(Q) $(MAKE) $(build)=crit $@
-crit: criu
-	$(Q) $(MAKE) $(build)=crit all
-.PHONY: crit
-
 unittest: $(criu-deps)
 	$(Q) $(MAKE) $(build)=criu unittest
 .PHONY: unittest
 
 
 #
-# Libraries next once crit it ready
+# Libraries next once criu is ready
 # (we might generate headers and such
 # when building criu itself).
 lib/Makefile: ;
-lib/%: crit .FORCE
+lib/%: criu .FORCE
 	$(Q) $(MAKE) $(build)=lib $@
-lib: crit
+lib: criu
 	$(Q) $(MAKE) $(build)=lib all
 .PHONY: lib
 
@@ -278,25 +299,28 @@ clean mrproper:
 	$(Q) $(MAKE) $(build)=criu $@
 	$(Q) $(MAKE) $(build)=soccr $@
 	$(Q) $(MAKE) $(build)=lib $@
+	$(Q) $(MAKE) $(build)=crit $@
 	$(Q) $(MAKE) $(build)=compel $@
 	$(Q) $(MAKE) $(build)=compel/plugins $@
-	$(Q) $(MAKE) $(build)=lib $@
-	$(Q) $(MAKE) $(build)=crit $@
 .PHONY: clean mrproper
 
 clean-amdgpu_plugin:
 	$(Q) $(MAKE) -C plugins/amdgpu clean
 .PHONY: clean-amdgpu_plugin
 
+clean-cuda_plugin:
+	$(Q) $(MAKE) -C plugins/cuda clean
+.PHONY: clean-cuda_plugin
+
 clean-top:
 	$(Q) $(MAKE) -C Documentation clean
 	$(Q) $(MAKE) $(build)=test/compel clean
 	$(Q) $(RM) .gitid
 .PHONY: clean-top
 
-clean: clean-top clean-amdgpu_plugin
+clean: clean-top clean-amdgpu_plugin clean-cuda_plugin
 
-mrproper-top: clean-top clean-amdgpu_plugin
+mrproper-top: clean-top clean-amdgpu_plugin clean-cuda_plugin
 	$(Q) $(RM) $(CONFIG_HEADER)
 	$(Q) $(RM) $(VERSION_HEADER)
 	$(Q) $(RM) $(COMPEL_VERSION_HEADER)
@@ -328,6 +352,14 @@ amdgpu_plugin: criu
 	$(Q) $(MAKE) -C plugins/amdgpu all
 .PHONY: amdgpu_plugin
 
+cuda_plugin: criu
+	$(Q) $(MAKE) -C plugins/cuda all
+.PHONY: cuda_plugin
+
+crit: lib
+	$(Q) $(MAKE) -C crit
+.PHONY: crit
+
 #
 # Generating tar requires tag matched CRIU_VERSION.
 # If not found then simply use GIT's describe with
@@ -393,6 +425,7 @@ help:
 	@echo '    Targets:'
 	@echo '      all             - Build all [*] targets'
 	@echo '    * criu            - Build criu'
+	@echo '    * crit            - Build crit'
 	@echo '      zdtm            - Build zdtm test-suite'
 	@echo '      docs            - Build documentation'
 	@echo '      install         - Install CRIU (see INSTALL.md)'
@@ -409,38 +442,57 @@ help:
 	@echo '      lint            - Run code linters'
 	@echo '      indent          - Indent C code'
 	@echo '      amdgpu_plugin   - Make AMD GPU plugin'
+	@echo '      cuda_plugin     - Make NVIDIA CUDA plugin'
 .PHONY: help
 
-lint:
-	flake8 --version
-	flake8 --config=scripts/flake8.cfg test/zdtm.py
-	flake8 --config=scripts/flake8.cfg test/inhfd/*.py
-	flake8 --config=scripts/flake8.cfg test/others/rpc/config_file.py
-	flake8 --config=scripts/flake8.cfg lib/py/images/pb2dict.py
-	flake8 --config=scripts/flake8.cfg scripts/criu-ns
-	flake8 --config=scripts/flake8.cfg scripts/crit-setup.py
-	flake8 --config=scripts/flake8.cfg coredump/
+ruff:
+	@ruff --version
+	ruff check ${RUFF_FLAGS} --config=scripts/ruff.toml \
+		test/zdtm.py \
+		test/inhfd/*.py \
+		test/others/rpc/config_file.py \
+		test/others/action-script/check_actions.py \
+		test/others/pycriu/*.py \
+		lib/pycriu/criu.py \
+		lib/pycriu/__init__.py \
+		lib/pycriu/images/pb2dict.py \
+		lib/pycriu/images/images.py \
+		scripts/criu-ns \
+		test/others/criu-ns/run.py \
+		crit/*.py \
+		crit/crit/*.py \
+		scripts/uninstall_module.py \
+		coredump/ coredump/coredump \
+		scripts/github-indent-warnings.py
+
+shellcheck:
 	shellcheck --version
 	shellcheck scripts/*.sh
-	shellcheck scripts/ci/*.sh scripts/ci/apt-install
-	shellcheck test/others/crit/*.sh
-	shellcheck test/others/libcriu/*.sh
-	shellcheck test/others/crit/*.sh test/others/criu-coredump/*.sh
-	shellcheck test/others/config-file/*.sh
+	shellcheck scripts/ci/*.sh
+	shellcheck contrib/apt-install contrib/dependencies/*.sh
+	shellcheck -x test/others/crit/*.sh
+	shellcheck -x test/others/libcriu/*.sh
+	shellcheck -x test/others/crit/*.sh test/others/criu-coredump/*.sh
+	shellcheck -x test/others/config-file/*.sh
+	shellcheck -x test/others/action-script/*.sh
+
+codespell:
 	codespell
-	# Do not append \n to pr_perror or fail
-	! git --no-pager grep -E '^\s*\<(pr_perror|fail)\>.*\\n"'
-	# Do not use %m with pr_perror or fail
-	! git --no-pager grep -E '^\s*\<(pr_perror|fail)\>.*%m'
-	# Do not use errno with pr_perror or fail
-	! git --no-pager grep -E '^\s*\<(pr_perror|fail)\>\(".*".*errno'
+
+lint: ruff shellcheck codespell
+	# Do not append \n to pr_perror, pr_pwarn or fail
+	! git --no-pager grep -E '^\s*\<(pr_perror|pr_pwarn|fail)\>.*\\n"'
+	# Do not use %m with pr_* or fail
+	! git --no-pager grep -E '^\s*\<(pr_(err|perror|warn|pwarn|debug|info|msg)|fail)\>.*%m'
+	# Do not use errno with pr_perror, pr_pwarn or fail
+	! git --no-pager grep -E '^\s*\<(pr_perror|pr_pwarn|fail)\>\(".*".*errno'
 	# End pr_(err|warn|msg|info|debug) with \n
 	! git --no-pager grep -En '^\s*\<pr_(err|warn|msg|info|debug)\>.*);$$' | grep -v '\\n'
 	# No EOL whitespace for C files
 	! git --no-pager grep -E '\s+$$' \*.c \*.h
-.PHONY: lint
+.PHONY: lint ruff shellcheck codespell
 
-codecov: SHELL := $(shell which bash)
+codecov: SHELL := $(shell command -v bash)
 codecov:
 	curl -Os https://uploader.codecov.io/latest/linux/codecov
 	chmod +x codecov
@@ -451,8 +503,10 @@ fetch-clang-format: .FORCE
 	$(E) ".clang-format"
 	$(Q) scripts/fetch-clang-format.sh
 
+BASE ?= "HEAD~1"
+OPTS ?= "--quiet"
 indent:
-	find . -name '*.[ch]' -type f -print0 | xargs --null --max-args 128 --max-procs 4 clang-format -i
+	git clang-format --style file --extensions c,h $(OPTS) $(BASE)
 .PHONY: indent
 
 include Makefile.install
diff --git a/Makefile.compel b/Makefile.compel
index 764afadc8..a4209edc5 100644
--- a/Makefile.compel
+++ b/Makefile.compel
@@ -50,8 +50,8 @@ compel/plugins/%: $(compel-deps) .FORCE
 
 #
 # GNU make 4.x supports targets matching via wide
-# match targeting, where GNU make 3.x series (used on
-# Travis) is not, so we have to write them here explicitly.
+# match targeting, where GNU make 3.x series is not,
+# so we have to write them here explicitly.
 compel/plugins/std.lib.a: $(compel-deps) .FORCE
 	$(Q) $(MAKE) $(build)=compel/plugins $@
 
diff --git a/Makefile.config b/Makefile.config
index d46d84f2d..5cf4b8216 100644
--- a/Makefile.config
+++ b/Makefile.config
@@ -2,12 +2,15 @@ include $(__nmk_dir)utils.mk
 include $(__nmk_dir)msg.mk
 include scripts/feature-tests.mak
 
+# This is a kludge for $(info ...) to not eat spaces.
+S :=
+
 ifeq ($(call try-cc,$(FEATURE_TEST_LIBBSD_DEV),-lbsd),true)
         LIBS_FEATURES	+= -lbsd
         FEATURE_DEFINES	+= -DCONFIG_HAS_LIBBSD
 else
-        $(info Note: Building without setproctitle() and strlcpy() support.)
-        $(info $(info)      To enable these features, please install libbsd-devel (RPM) / libbsd-dev (DEB).)
+        $(info Note: Building without setproctitle() support.)
+        $(info $S      Install libbsd-devel (RPM) / libbsd-dev (DEB) to fix.)
 endif
 
 ifeq ($(call pkg-config-check,libselinux),y)
@@ -23,10 +26,10 @@ endif
 
 ifeq ($(call pkg-config-check,libdrm),y)
         export CONFIG_AMDGPU := y
-        $(info Note: Building criu with amdgpu_plugin.)
+        $(info Note: Building with amdgpu_plugin.)
 else
-        $(info Note: Building criu without amdgpu_plugin.)
-        $(info Note: libdrm and libdrm_amdgpu are required to build amdgpu_plugin.)
+        $(info Note: Building without amdgpu_plugin.)
+        $(info $S      Install libdrm-devel (RPM) or libdrm-dev (DEB) to fix.)
 endif
 
 ifeq ($(NO_GNUTLS)x$(call pkg-config-check,gnutls),xy)
@@ -34,7 +37,8 @@ ifeq ($(NO_GNUTLS)x$(call pkg-config-check,gnutls),xy)
         export CONFIG_GNUTLS := y
         FEATURE_DEFINES	+= -DCONFIG_GNUTLS
 else
-        $(info Note: Building without GnuTLS support)
+        $(info Note: Building without GnuTLS support.)
+        $(info $S      Install gnutls-devel (RPM) or gnutls-dev (DEB) to fix.)
 endif
 
 ifeq ($(call pkg-config-check,libnftables),y)
@@ -46,16 +50,19 @@ ifeq ($(call pkg-config-check,libnftables),y)
                 LIBS_FEATURES	+= $(LIB_NFTABLES)
                 FEATURE_DEFINES	+= -DCONFIG_HAS_NFTABLES_LIB_API_1
         else
-                $(warning Warn: you have libnftables installed but it has incompatible API)
-                $(warning Warn: Building without nftables support)
+                $(info Warn: Building without nftables support (incompatible API version).)
         endif
 else
-        $(warning Warn: you have no libnftables installed)
-        $(warning Warn: Building without nftables support)
+        $(info Warn: Building without nftables support.)
+        $(info $S      Install nftables-devel (RPM) or libnftables-dev (DEB) to fix.)
 endif
 
 export LIBS += $(LIBS_FEATURES)
 
+ifneq ($(PLUGINDIR),)
+                FEATURE_DEFINES	+= -DCR_PLUGIN_DEFAULT="\"$(PLUGINDIR)\""
+endif
+
 CONFIG_FILE = .config
 
 $(CONFIG_FILE):
@@ -67,24 +74,26 @@ ifeq ($(call try-asm,$(FEATURE_TEST_X86_COMPAT)),true)
         export CONFIG_COMPAT := y
         FEATURE_DEFINES	+= -DCONFIG_COMPAT
 else
-        $(info Note: Building without ia32 C/R, missed ia32 support in gcc)
-        $(info $(info)      That may be related to missing gcc-multilib in your)
-        $(info $(info)      distribution or you may have Debian with buggy toolchain)
-        $(info $(info)      (issue https://github.com/checkpoint-restore/criu/issues/315))
+        $(info Note: Building without ia32 C/R, missing ia32 support in gcc.)
+        $(info $S      It may be related to missing gcc-multilib in your)
+        $(info $S      distribution, or you may have Debian with buggy toolchain.)
+        $(info $S      See https://github.com/checkpoint-restore/criu/issues/315.)
 endif
 endif
 
 export DEFINES += $(FEATURE_DEFINES)
 export CFLAGS += $(FEATURE_DEFINES)
 
-FEATURES_LIST	:= TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \
-	SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW FSCONFIG MEMFD_CREATE OPENAT2
+FEATURES_LIST	:= TCP_REPAIR PTRACE_PEEKSIGINFO \
+	SETPROCTITLE_INIT TCP_REPAIR_WINDOW MEMFD_CREATE \
+	OPENAT2 NO_LIBC_RSEQ_DEFS
 
 # $1 - config name
 define gen-feature-test
 ifeq ($$(call try-cc,$$(FEATURE_TEST_$(1)),$$(LIBS_FEATURES),$$(DEFINES)),true)
 	$(Q) echo '#define CONFIG_HAS_$(1)' >> $$@
-	$(Q) echo '' >> $$@
+else
+	$(Q) echo '// CONFIG_HAS_$(1) is not set' >> $$@
 endif
 endef
 
diff --git a/Makefile.install b/Makefile.install
index c798637be..70c607ec6 100644
--- a/Makefile.install
+++ b/Makefile.install
@@ -29,6 +29,33 @@ LIBDIR ?= $(PREFIX)/lib
 export PREFIX BINDIR SBINDIR MANDIR RUNDIR
 export LIBDIR INCLUDEDIR LIBEXECDIR PLUGINDIR
 
+# Detect externally managed Python environment (PEP 668).
+PYTHON_EXTERNALLY_MANAGED := $(shell $(PYTHON) -c 'import os, sysconfig; print(int(os.path.isfile(os.path.join(sysconfig.get_path("stdlib"), "EXTERNALLY-MANAGED"))))')
+PIP_BREAK_SYSTEM_PACKAGES ?= 0
+
+# If Python environment is externally managed and PIP_BREAK_SYSTEM_PACKAGES is not set, skip pip install.
+SKIP_PIP_INSTALL := 0
+ifeq ($(PYTHON_EXTERNALLY_MANAGED),1)
+ifeq ($(PIP_BREAK_SYSTEM_PACKAGES),0)
+
+SKIP_PIP_INSTALL := 1
+$(info Warn: Externally managed python environment)
+$(info Consider using PIP_BREAK_SYSTEM_PACKAGES=1)
+
+endif
+endif
+
+# Default flags for pip install:
+# --ignore-installed: Overwrite already installed pycriu/crit packages
+# --no-build-isolation: Use current Python environment to build pycriu/crit packages
+# --no-deps: Don't install any dependencies
+# --no-index: Don't use PyPI index to find packages
+# --progress-bar: Cleaner output
+# --upgrade: Treat the install as an upgrade when replacing the installed version
+PIPFLAGS ?= --ignore-installed --no-build-isolation --no-deps --no-index --progress-bar off --upgrade
+
+export SKIP_PIP_INSTALL PIPFLAGS
+
 install-man:
 	$(Q) $(MAKE) -C Documentation install
 .PHONY: install-man
@@ -37,6 +64,10 @@ install-lib: lib
 	$(Q) $(MAKE) $(build)=lib install
 .PHONY: install-lib
 
+install-crit: lib
+	$(Q) $(MAKE) $(build)=crit install
+.PHONY: install-crit
+
 install-criu: criu
 	$(Q) $(MAKE) $(build)=criu install
 .PHONY: install-criu
@@ -45,19 +76,25 @@ install-amdgpu_plugin: amdgpu_plugin
 	$(Q) $(MAKE) -C plugins/amdgpu install
 .PHONY: install-amdgpu_plugin
 
+install-cuda_plugin: cuda_plugin
+	$(Q) $(MAKE) -C plugins/cuda install
+.PHONY: install-cuda_plugin
+
 install-compel: $(compel-install-targets)
 	$(Q) $(MAKE) $(build)=compel install
 	$(Q) $(MAKE) $(build)=compel/plugins install
 .PHONY: install-compel
 
-install: install-man install-lib install-criu install-compel install-amdgpu_plugin ;
+install: install-man install-lib install-crit install-criu install-compel install-amdgpu_plugin install-cuda_plugin ;
 .PHONY: install
 
 uninstall:
 	$(Q) $(MAKE) -C Documentation $@
 	$(Q) $(MAKE) $(build)=lib $@
+	$(Q) $(MAKE) $(build)=crit $@
 	$(Q) $(MAKE) $(build)=criu $@
 	$(Q) $(MAKE) $(build)=compel $@
 	$(Q) $(MAKE) $(build)=compel/plugins $@
 	$(Q) $(MAKE) -C plugins/amdgpu $@
+	$(Q) $(MAKE) -C plugins/cuda $@
 .PHONY: uninstall
diff --git a/Makefile.versions b/Makefile.versions
index 73bc2d5fa..3e6c9ed22 100644
--- a/Makefile.versions
+++ b/Makefile.versions
@@ -1,10 +1,10 @@
 #
 # CRIU version.
-CRIU_VERSION_MAJOR	:= 3
-CRIU_VERSION_MINOR	:= 17
-CRIU_VERSION_SUBLEVEL	:= 1
+CRIU_VERSION_MAJOR	:= 4
+CRIU_VERSION_MINOR	:= 2
+CRIU_VERSION_SUBLEVEL	:=
 CRIU_VERSION_EXTRA	:=
-CRIU_VERSION_NAME	:= Radiant Redstart
+CRIU_VERSION_NAME	:= CRIUTIBILITY
 CRIU_VERSION		:= $(CRIU_VERSION_MAJOR)$(if $(CRIU_VERSION_MINOR),.$(CRIU_VERSION_MINOR))$(if $(CRIU_VERSION_SUBLEVEL),.$(CRIU_VERSION_SUBLEVEL))$(if $(CRIU_VERSION_EXTRA),.$(CRIU_VERSION_EXTRA))
 
 export CRIU_VERSION_MAJOR CRIU_VERSION_MINOR CRIU_VERSION_SUBLEVEL
diff --git a/README.md b/README.md
index ff4aa1a23..6e2a0de9e 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 [![CircleCI](https://circleci.com/gh/checkpoint-restore/criu.svg?style=svg)](
     https://circleci.com/gh/checkpoint-restore/criu)
 
-<p align="center"><img src="https://criu.org/w/images/1/1c/CRIU.svg" width="256px"/></p>
+<p align="center"><img src="Documentation/logo.svg" width="256px"/></p>
 
 ## CRIU -- A project to implement checkpoint/restore functionality for Linux
 
@@ -35,10 +35,10 @@ Pages worth starting with are:
 - [Installation instructions](http://criu.org/Installation)
 - [A simple example of usage](http://criu.org/Simple_loop)
 - [Examples of more advanced usage](https://criu.org/Category:HOWTO)
-- Troubleshooting can be hard, some help can be found [here](https://criu.org/When_C/R_fails), [here](https://criu.org/What_cannot_be_checkpointed) and [here](https://criu.org/FAQ)
+- Troubleshooting can be hard, some help can be found [here](https://criu.org/When_C/R_fails), [here](https://criu.org/What_cannot_be_checkpointed) and [here](https://criu.org/index.php?title=FAQ)
 
 ### Checkpoint and restore of simple loop process
-[<p align="center"><img src="https://asciinema.org/a/232445.png" width="572px" height="412px"/></p>](https://asciinema.org/a/232445)
+<p align="center"><a href="https://asciinema.org/a/232445"><img src="https://asciinema.org/a/232445.png" width="572px" height="412px"/></a></p>
 
 ## Advanced features
 
diff --git a/compel/.gitignore b/compel/.gitignore
index eab3337d6..5e770a86c 100644
--- a/compel/.gitignore
+++ b/compel/.gitignore
@@ -4,6 +4,9 @@ arch/arm/plugins/std/syscalls/syscalls.S
 arch/aarch64/plugins/std/syscalls/syscalls.S
 arch/s390/plugins/std/syscalls/syscalls.S
 arch/ppc64/plugins/std/syscalls/syscalls.S
+arch/mips/plugins/std/syscalls/syscalls-64.S
+arch/loongarch64/plugins/std/syscalls/syscalls-64.S
+arch/riscv64/plugins/std/syscalls/syscalls.S
 include/version.h
 plugins/include/uapi/std/asm/syscall-types.h
 plugins/include/uapi/std/syscall-64.h
diff --git a/compel/Makefile b/compel/Makefile
index b79aee687..c0b8a82a0 100644
--- a/compel/Makefile
+++ b/compel/Makefile
@@ -32,8 +32,8 @@ ifeq ($(ARCH),x86)
 lib-y			+= arch/$(ARCH)/src/lib/thread_area.o
 endif
 
-# handle_elf() has no support of ELF relocations on ARM (yet?)
-ifneq ($(filter arm aarch64,$(ARCH)),)
+# handle_elf() has no support of ELF relocations on ARM and RISCV64 (yet?)
+ifneq ($(filter arm aarch64 loongarch64 riscv64,$(ARCH)),)
 CFLAGS			+= -DNO_RELOCS
 HOSTCFLAGS		+= -DNO_RELOCS
 endif
diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/breakpoints.h b/compel/arch/aarch64/src/lib/include/uapi/asm/breakpoints.h
index 5f090490d..8a61b268f 100644
--- a/compel/arch/aarch64/src/lib/include/uapi/asm/breakpoints.h
+++ b/compel/arch/aarch64/src/lib/include/uapi/asm/breakpoints.h
@@ -2,14 +2,41 @@
 #define __COMPEL_BREAKPOINTS_H__
 #define ARCH_SI_TRAP TRAP_BRKPT
 
-static inline int ptrace_set_breakpoint(pid_t pid, void *addr)
-{
-	return 0;
-}
+#include <sys/types.h>
+#include <stdbool.h>
 
-static inline int ptrace_flush_breakpoints(pid_t pid)
-{
-	return 0;
-}
+struct hwbp_cap {
+	char arch;
+	char bp_count;
+};
+
+/* copied from `linux/arch/arm64/include/asm/hw_breakpoint.h` */
+/* Lengths */
+#define ARM_BREAKPOINT_LEN_1 0x1
+#define ARM_BREAKPOINT_LEN_2 0x3
+#define ARM_BREAKPOINT_LEN_3 0x7
+#define ARM_BREAKPOINT_LEN_4 0xf
+#define ARM_BREAKPOINT_LEN_5 0x1f
+#define ARM_BREAKPOINT_LEN_6 0x3f
+#define ARM_BREAKPOINT_LEN_7 0x7f
+#define ARM_BREAKPOINT_LEN_8 0xff
+
+/* Privilege Levels */
+#define AARCH64_BREAKPOINT_EL1 1
+#define AARCH64_BREAKPOINT_EL0 2
+
+/* Breakpoint */
+#define ARM_BREAKPOINT_EXECUTE 0
+
+/* Watchpoints */
+#define ARM_BREAKPOINT_LOAD	1
+#define ARM_BREAKPOINT_STORE	2
+#define AARCH64_ESR_ACCESS_MASK (1 << 6)
+
+#define DISABLE_HBP 0
+#define ENABLE_HBP  1
+
+int ptrace_set_breakpoint(pid_t pid, void *addr);
+int ptrace_flush_breakpoints(pid_t pid);
 
 #endif
diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h
new file mode 100644
index 000000000..9f9655e3b
--- /dev/null
+++ b/compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h
@@ -0,0 +1,47 @@
+#ifndef __UAPI_ASM_GCS_TYPES_H__
+#define __UAPI_ASM_GCS_TYPES_H__
+
+#ifndef NT_ARM_GCS
+#define NT_ARM_GCS 0x410 /* ARM GCS state */
+#endif
+
+/* Shadow Stack/Guarded Control Stack interface */
+#define PR_GET_SHADOW_STACK_STATUS	74
+#define PR_SET_SHADOW_STACK_STATUS	75
+#define PR_LOCK_SHADOW_STACK_STATUS	76
+
+/* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack */
+#ifndef PR_SHADOW_STACK_ENABLE
+#define PR_SHADOW_STACK_ENABLE		(1UL << 0)
+#endif
+
+/* Allows explicit GCS stores (eg. using GCSSTR) */
+#ifndef PR_SHADOW_STACK_WRITE
+#define PR_SHADOW_STACK_WRITE		(1UL << 1)
+#endif
+
+/* Allows explicit GCS pushes (eg. using GCSPUSHM) */
+#ifndef PR_SHADOW_STACK_PUSH
+#define PR_SHADOW_STACK_PUSH		(1UL << 2)
+#endif
+
+#ifndef SHADOW_STACK_SET_TOKEN
+#define SHADOW_STACK_SET_TOKEN 0x1     /* Set up a restore token in the shadow stack */
+#endif
+
+#define PR_SHADOW_STACK_ALL_MODES \
+	PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH
+
+/* copied from: arch/arm64/include/asm/sysreg.h */
+#define GCS_CAP_VALID_TOKEN 0x1
+#define GCS_CAP_ADDR_MASK 0xFFFFFFFFFFFFF000ULL
+#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | GCS_CAP_VALID_TOKEN)
+#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK)
+
+#include <asm/hwcap.h>
+
+#ifndef HWCAP_GCS
+#define HWCAP_GCS (1UL << 32)
+#endif
+
+#endif /* __UAPI_ASM_GCS_TYPES_H__ */
\ No newline at end of file
diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h
index 9d4ce7e2e..606c92ffe 100644
--- a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h
+++ b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h
@@ -2,6 +2,7 @@
 #define UAPI_COMPEL_ASM_TYPES_H__
 
 #include <stdint.h>
+#include <stdbool.h>
 #include <signal.h>
 #include <sys/mman.h>
 #include <asm/ptrace.h>
@@ -16,7 +17,24 @@
  */
 
 typedef struct user_pt_regs user_regs_struct_t;
-typedef struct user_fpsimd_state user_fpregs_struct_t;
+
+/*
+ * GCS (Guarded Control Stack)
+ *
+ * This mirrors the kernel definition but renamed to cr_user_gcs
+ * to avoid conflict with kernel headers (/usr/include/asm/ptrace.h).
+ */
+struct cr_user_gcs {
+	__u64 features_enabled;
+	__u64 features_locked;
+	__u64 gcspr_el0;
+};
+
+struct user_fpregs_struct {
+	struct user_fpsimd_state fpstate;
+	struct cr_user_gcs gcs;
+};
+typedef struct user_fpregs_struct user_fpregs_struct_t;
 
 #define __compel_arch_fetch_thread_area(tid, th) 0
 #define compel_arch_fetch_thread_area(tctl)	 0
@@ -39,4 +57,12 @@ typedef struct user_fpsimd_state user_fpregs_struct_t;
 		__NR_##syscall; \
 	})
 
+extern bool __compel_host_supports_gcs(void);
+#define compel_host_supports_gcs __compel_host_supports_gcs
+
+struct parasite_ctl;
+extern int __parasite_setup_shstk(struct parasite_ctl *ctl,
+				  user_fpregs_struct_t *ext_regs);
+#define parasite_setup_shstk __parasite_setup_shstk
+
 #endif /* UAPI_COMPEL_ASM_TYPES_H__ */
diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h
index f8ec55d6c..7efee528f 100644
--- a/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h
+++ b/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h
@@ -1,24 +1,34 @@
 #ifndef UAPI_COMPEL_ASM_SIGFRAME_H__
 #define UAPI_COMPEL_ASM_SIGFRAME_H__
 
-#include <asm/sigcontext.h>
+#include <signal.h>
 #include <sys/ucontext.h>
 
 #include <stdint.h>
+#include <asm/types.h>
 
 /* Copied from the kernel header arch/arm64/include/uapi/asm/sigcontext.h */
 
 #define FPSIMD_MAGIC 0x46508001
+#define GCS_MAGIC    0x47435300
 
 typedef struct fpsimd_context fpu_state_t;
 
+struct gcs_context {
+	struct _aarch64_ctx head;
+	__u64 gcspr;
+	__u64 features_enabled;
+	__u64 reserved;
+};
+
 struct aux_context {
 	struct fpsimd_context fpsimd;
+	struct gcs_context gcs;
 	/* additional context to be added before "end" */
 	struct _aarch64_ctx end;
 };
 
-// XXX: the idetifier rt_sigcontext is expected to be struct by the CRIU code
+// XXX: the identifier rt_sigcontext is expected to be struct by the CRIU code
 #define rt_sigcontext sigcontext
 
 #include <compel/sigframe-common.h>
@@ -62,6 +72,7 @@ struct cr_sigcontext {
 #define RT_SIGFRAME_AUX_CONTEXT(rt_sigframe) ((struct aux_context *)&(RT_SIGFRAME_SIGCONTEXT(rt_sigframe)->__reserved))
 #define RT_SIGFRAME_FPU(rt_sigframe)	     (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->fpsimd)
 #define RT_SIGFRAME_OFFSET(rt_sigframe)	     0
+#define RT_SIGFRAME_GCS(rt_sigframe)	     (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->gcs)
 
 #define rt_sigframe_erase_sigset(sigframe)	memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t))
 #define rt_sigframe_copy_sigset(sigframe, from) memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t))
diff --git a/compel/arch/aarch64/src/lib/infect.c b/compel/arch/aarch64/src/lib/infect.c
index bd1ed0da3..42f593c79 100644
--- a/compel/arch/aarch64/src/lib/infect.c
+++ b/compel/arch/aarch64/src/lib/infect.c
@@ -2,7 +2,9 @@
 #include <sys/ptrace.h>
 #include <sys/types.h>
 #include <sys/uio.h>
-#include <linux/elf.h>
+#include <sys/auxv.h>
+#include <asm/ptrace.h>
+
 #include <compel/plugins/std/syscall-codes.h>
 #include "common/page.h"
 #include "uapi/compel/asm/infect-types.h"
@@ -10,6 +12,9 @@
 #include "errno.h"
 #include "infect.h"
 #include "infect-priv.h"
+#include "asm/breakpoints.h"
+#include "asm/gcs-types.h"
+#include <linux/prctl.h>
 
 unsigned __page_size = 0;
 unsigned __page_shift = 0;
@@ -30,24 +35,54 @@ static inline void __always_unused __check_code_syscall(void)
 	BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
 }
 
+bool __compel_host_supports_gcs(void)
+{
+	unsigned long hwcap = getauxval(AT_HWCAP);
+	return (hwcap & HWCAP_GCS) != 0;
+}
+
+static bool __compel_gcs_enabled(struct cr_user_gcs *gcs)
+{
+	if (!compel_host_supports_gcs())
+		return false;
+
+	return gcs && (gcs->features_enabled & PR_SHADOW_STACK_ENABLE) != 0;
+}
+
 int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
 {
 	struct fpsimd_context *fpsimd = RT_SIGFRAME_FPU(sigframe);
+	struct gcs_context *gcs = RT_SIGFRAME_GCS(sigframe);
 
 	memcpy(sigframe->uc.uc_mcontext.regs, regs->regs, sizeof(regs->regs));
 
+	pr_debug("sigreturn_prep_regs_plain: sp %lx pc %lx\n", (long)regs->sp, (long)regs->pc);
+
 	sigframe->uc.uc_mcontext.sp = regs->sp;
 	sigframe->uc.uc_mcontext.pc = regs->pc;
 	sigframe->uc.uc_mcontext.pstate = regs->pstate;
 
-	memcpy(fpsimd->vregs, fpregs->vregs, 32 * sizeof(__uint128_t));
+	memcpy(fpsimd->vregs, fpregs->fpstate.vregs, 32 * sizeof(__uint128_t));
 
-	fpsimd->fpsr = fpregs->fpsr;
-	fpsimd->fpcr = fpregs->fpcr;
+	fpsimd->fpsr = fpregs->fpstate.fpsr;
+	fpsimd->fpcr = fpregs->fpstate.fpcr;
 
 	fpsimd->head.magic = FPSIMD_MAGIC;
 	fpsimd->head.size = sizeof(*fpsimd);
 
+	if (__compel_gcs_enabled(&fpregs->gcs)) {
+		gcs->head.magic = GCS_MAGIC;
+		gcs->head.size = sizeof(*gcs);
+		gcs->reserved = 0;
+		gcs->gcspr = fpregs->gcs.gcspr_el0 - 8;
+		gcs->features_enabled = fpregs->gcs.features_enabled;
+
+		pr_debug("sigframe gcspr=%llx features_enabled=%llx\n", fpregs->gcs.gcspr_el0 - 8, fpregs->gcs.features_enabled);
+	} else {
+		pr_debug("sigframe gcspr=[disabled]\n");
+		memset(gcs, 0, sizeof(*gcs));
+	}
+
 	return 0;
 }
 
@@ -59,7 +94,6 @@ int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigfr
 int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
 			 void *arg, __maybe_unused unsigned long flags)
 {
-	user_fpregs_struct_t tmp, *fpsimd = ext_regs ? ext_regs : &tmp;
 	struct iovec iov;
 	int ret;
 
@@ -72,14 +106,28 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct
 		goto err;
 	}
 
-	iov.iov_base = fpsimd;
-	iov.iov_len = sizeof(*fpsimd);
+	iov.iov_base = &ext_regs->fpstate;
+	iov.iov_len = sizeof(ext_regs->fpstate);
 	if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) {
 		pr_perror("Failed to obtain FPU registers for %d", pid);
 		goto err;
 	}
 
-	ret = save(arg, regs, fpsimd);
+	memset(&ext_regs->gcs, 0, sizeof(ext_regs->gcs));
+
+	iov.iov_base = &ext_regs->gcs;
+	iov.iov_len = sizeof(ext_regs->gcs);
+	if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &iov) == 0) {
+		pr_info("gcs: GCSPR_EL0 for %d: 0x%llx, features: 0x%llx\n",
+			pid, ext_regs->gcs.gcspr_el0, ext_regs->gcs.features_enabled);
+
+		if (!__compel_gcs_enabled(&ext_regs->gcs))
+			pr_info("gcs: GCS is NOT enabled\n");
+	} else {
+		pr_info("gcs: GCS state not available for %d\n", pid);
+	}
+
+	ret = save(pid, arg, regs, ext_regs);
 err:
 	return ret;
 }
@@ -88,14 +136,44 @@ int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
 {
 	struct iovec iov;
 
+	struct cr_user_gcs gcs;
+	struct iovec gcs_iov = { .iov_base = &gcs, .iov_len = sizeof(gcs) };
+
 	pr_info("Restoring GP/FPU registers for %d\n", pid);
 
-	iov.iov_base = ext_regs;
-	iov.iov_len = sizeof(*ext_regs);
+	iov.iov_base = &ext_regs->fpstate;
+	iov.iov_len = sizeof(ext_regs->fpstate);
 	if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov)) {
 		pr_perror("Failed to set FPU registers for %d", pid);
 		return -1;
 	}
+
+	if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) < 0) {
+		pr_warn("gcs: Failed to get GCS for %d\n", pid);
+	} else {
+		ext_regs->gcs = gcs;
+		compel_set_task_gcs_regs(pid, ext_regs);
+	}
+
+	return 0;
+}
+
+int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
+{
+	struct iovec iov;
+
+	pr_info("gcs: restoring GCS registers for %d\n", pid);
+	pr_info("gcs: restoring GCS: gcspr=%llx features=%llx\n",
+		ext_regs->gcs.gcspr_el0, ext_regs->gcs.features_enabled);
+
+	iov.iov_base = &ext_regs->gcs;
+	iov.iov_len = sizeof(ext_regs->gcs);
+
+	if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_GCS, &iov)) {
+		pr_perror("gcs: Failed to set GCS registers for %d", pid);
+		return -1;
+	}
+
 	return 0;
 }
 
@@ -176,3 +254,176 @@ unsigned long compel_task_size(void)
 			break;
 	return task_size;
 }
+
+static struct hwbp_cap *ptrace_get_hwbp_cap(pid_t pid)
+{
+	static struct hwbp_cap info;
+	static int available = -1;
+
+	if (available == -1) {
+		unsigned int val;
+		struct iovec iovec = {
+			.iov_base = &val,
+			.iov_len = sizeof(val),
+		};
+
+		if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_HW_BREAK, &iovec) < 0)
+			available = 0;
+		else {
+			info.arch = (char)((val >> 8) & 0xff);
+			info.bp_count = (char)(val & 0xff);
+
+			available = (info.arch != 0);
+		}
+	}
+
+	return available == 1 ? &info : NULL;
+}
+
+int ptrace_set_breakpoint(pid_t pid, void *addr)
+{
+	k_rtsigset_t block;
+	struct hwbp_cap *info = ptrace_get_hwbp_cap(pid);
+	struct user_hwdebug_state regs = {};
+	unsigned int ctrl = 0;
+	struct iovec iovec;
+
+	if (info == NULL || info->bp_count == 0)
+		return 0;
+
+	/*
+	 * The struct is copied from `arch/arm64/include/asm/hw_breakpoint.h` in
+	 * linux kernel:
+	 *  struct arch_hw_breakpoint_ctrl {
+	 *  	__u32 __reserved        : 19,
+	 *  	len             : 8,
+	 *  	type            : 2,
+	 *  	privilege       : 2,
+	 *  	enabled         : 1;
+	 *  };
+	 *
+	 * The part of `struct arch_hw_breakpoint_ctrl` bits meaning is defined
+	 * in <<ARM Architecture Reference Manual for A-profile architecture>>,
+	 * D13.3.2 DBGBCR<n>_EL1, Debug Breakpoint Control Registers.
+	 */
+	ctrl = ARM_BREAKPOINT_LEN_4;
+	ctrl = (ctrl << 2) | ARM_BREAKPOINT_EXECUTE;
+	ctrl = (ctrl << 2) | AARCH64_BREAKPOINT_EL0;
+	ctrl = (ctrl << 1) | ENABLE_HBP;
+	regs.dbg_regs[0].addr = (__u64)addr;
+	regs.dbg_regs[0].ctrl = ctrl;
+	iovec.iov_base = &regs;
+	iovec.iov_len = (offsetof(struct user_hwdebug_state, dbg_regs) + sizeof(regs.dbg_regs[0]));
+
+	if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_HW_BREAK, &iovec))
+		return -1;
+
+	/*
+	 * FIXME(issues/1429): SIGTRAP can't be blocked, otherwise its handler
+	 * will be reset to the default one.
+	 */
+	ksigfillset(&block);
+	ksigdelset(&block, SIGTRAP);
+	if (ptrace(PTRACE_SETSIGMASK, pid, sizeof(k_rtsigset_t), &block)) {
+		pr_perror("Can't block signals for %d", pid);
+		return -1;
+	}
+
+	if (ptrace(PTRACE_CONT, pid, NULL, NULL) != 0) {
+		pr_perror("Unable to restart the  stopped tracee process %d", pid);
+		return -1;
+	}
+
+	return 1;
+}
+
+int ptrace_flush_breakpoints(pid_t pid)
+{
+	struct hwbp_cap *info = ptrace_get_hwbp_cap(pid);
+	struct user_hwdebug_state regs = {};
+	unsigned int ctrl = 0;
+	struct iovec iovec;
+
+	if (info == NULL || info->bp_count == 0)
+		return 0;
+
+	ctrl = ARM_BREAKPOINT_LEN_4;
+	ctrl = (ctrl << 2) | ARM_BREAKPOINT_EXECUTE;
+	ctrl = (ctrl << 2) | AARCH64_BREAKPOINT_EL0;
+	ctrl = (ctrl << 1) | DISABLE_HBP;
+	regs.dbg_regs[0].addr = 0ul;
+	regs.dbg_regs[0].ctrl = ctrl;
+
+	iovec.iov_base = &regs;
+	iovec.iov_len = (offsetof(struct user_hwdebug_state, dbg_regs) + sizeof(regs.dbg_regs[0]));
+
+	if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_HW_BREAK, &iovec))
+		return -1;
+
+	return 0;
+}
+
+int inject_gcs_cap_token(struct parasite_ctl *ctl, pid_t pid, struct cr_user_gcs *gcs)
+{
+	struct iovec gcs_iov = { .iov_base = gcs, .iov_len = sizeof(*gcs) };
+
+	uint64_t token_addr = gcs->gcspr_el0 - 8;
+	uint64_t sigtramp_addr = gcs->gcspr_el0 - 16;
+
+	uint64_t cap_token = ALIGN_DOWN(GCS_SIGNAL_CAP(token_addr), 8);
+	unsigned long restorer_addr;
+
+	pr_info("gcs: (setup) CAP token: 0x%lx at addr: 0x%lx\n", cap_token, token_addr);
+
+	/* Inject capability token at gcspr_el0 - 8 */
+	if (ptrace(PTRACE_POKEDATA, pid, (void *)token_addr, cap_token)) {
+		pr_perror("gcs: (setup) Inject GCS cap token failed");
+		return -1;
+	}
+
+	/* Inject restorer trampoline address (gcspr_el0 - 16) */
+	restorer_addr = ctl->parasite_ip;
+	if (ptrace(PTRACE_POKEDATA, pid, (void *)sigtramp_addr, restorer_addr)) {
+		pr_perror("gcs: (setup) Inject GCS restorer failed");
+		return -1;
+	}
+
+	/* Update GCSPR_EL0 */
+	gcs->gcspr_el0 = token_addr;
+	if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_GCS, &gcs_iov)) {
+		pr_perror("gcs: PTRACE_SETREGS FAILED");
+		return -1;
+	}
+
+	pr_debug("gcs: parasite_ip=%#lx sp=%#llx gcspr_el0=%#llx\n",
+		 ctl->parasite_ip, ctl->orig.regs.sp, gcs->gcspr_el0);
+
+	return 0;
+}
+
+int parasite_setup_shstk(struct parasite_ctl *ctl, user_fpregs_struct_t *ext_regs)
+{
+	struct cr_user_gcs gcs;
+	struct iovec gcs_iov = { .iov_base = &gcs, .iov_len = sizeof(gcs) };
+	pid_t pid = ctl->rpid;
+
+	if(!__compel_host_supports_gcs())
+		return 0;
+
+	if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) != 0) {
+		pr_perror("GCS state not available for %d", pid);
+		return -1;
+	}
+
+	if (!__compel_gcs_enabled(&gcs))
+		return 0;
+
+	if (inject_gcs_cap_token(ctl, pid, &gcs)) {
+		pr_perror("Failed to inject GCS cap token for %d", pid);
+		return -1;
+	}
+
+	pr_info("gcs: GCS enabled for %d\n", pid);
+
+	return 0;
+}
diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def
index 8bcc3cc50..f4deb02b2 100644
--- a/compel/arch/arm/plugins/std/syscalls/syscall.def
+++ b/compel/arch/arm/plugins/std/syscalls/syscall.def
@@ -39,7 +39,7 @@ recvfrom			207	292	(int sockfd, void *ubuf, size_t size, unsigned int flags, str
 sendmsg				211	296	(int sockfd, const struct msghdr *msg, int flags)
 recvmsg				212	297	(int sockfd, struct msghdr *msg, int flags)
 shutdown			210	293	(int sockfd, int how)
-bind				235	282	(int sockfd, const struct sockaddr *addr, int addrlen)
+bind				200	282	(int sockfd, const struct sockaddr *addr, int addrlen)
 setsockopt			208	294	(int sockfd, int level, int optname, const void *optval, socklen_t optlen)
 getsockopt			209	295	(int sockfd, int level, int optname, const void *optval, socklen_t *optlen)
 clone				220	120	(unsigned long flags, void *child_stack, void *parent_tid, unsigned long newtls, void *child_tid)
@@ -85,7 +85,7 @@ timer_settime			110	258	(kernel_timer_t timer_id, int flags, const struct itimer
 timer_gettime			108	259	(int timer_id, const struct itimerspec *setting)
 timer_getoverrun		109	260	(int timer_id)
 timer_delete			111	261	(kernel_timer_t timer_id)
-clock_gettime			113	263	(const clockid_t which_clock, const struct timespec *tp)
+clock_gettime			113	263	(clockid_t which_clock, struct timespec *tp)
 exit_group			94	248	(int error_code)
 set_robust_list			99	338	(struct robust_list_head *head, size_t len)
 get_robust_list			100	339	(int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
@@ -118,7 +118,10 @@ fsopen				430	430	(char *fsname, unsigned int flags)
 fsconfig			431	431	(int fd, unsigned int cmd, const char *key, const char *value, int aux)
 fsmount				432	432	(int fd, unsigned int flags, unsigned int attr_flags)
 clone3				435	435	(struct clone_args *uargs, size_t size)
+close_range			436	436	(unsigned int fd, unsigned int max_fd, unsigned int flags)
 pidfd_open			434	434	(pid_t pid, unsigned int flags)
 openat2				437	437	(int dirfd, char *pathname, struct open_how *how, size_t size)
 pidfd_getfd			438	438	(int pidfd, int targetfd, unsigned int flags)
 rseq				293	398	(void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+membarrier 			283	389	(int cmd, unsigned int flags, int cpu_id)
+map_shadow_stack		453	!	(unsigned long addr, unsigned long size, unsigned int flags)
\ No newline at end of file
diff --git a/compel/arch/arm/src/lib/infect.c b/compel/arch/arm/src/lib/infect.c
index 7700f52ca..a9fb639e2 100644
--- a/compel/arch/arm/src/lib/infect.c
+++ b/compel/arch/arm/src/lib/infect.c
@@ -65,10 +65,9 @@ int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigfr
 }
 
 #define PTRACE_GETVFPREGS 27
-int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *vfp, save_regs_t save,
 			 void *arg, __maybe_unused unsigned long flags)
 {
-	user_fpregs_struct_t tmp, *vfp = ext_regs ? ext_regs : &tmp;
 	int ret = -1;
 
 	pr_info("Dumping GP/FPU registers for %d\n", pid);
@@ -95,7 +94,7 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct
 		}
 	}
 
-	ret = save(arg, regs, vfp);
+	ret = save(pid, arg, regs, vfp);
 err:
 	return ret;
 }
diff --git a/compel/arch/loongarch64/plugins/include/asm/prologue.h b/compel/arch/loongarch64/plugins/include/asm/prologue.h
new file mode 100644
index 000000000..c19ce54d7
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/include/asm/prologue.h
@@ -0,0 +1,35 @@
+#ifndef __ASM_PROLOGUE_H__
+#define __ASM_PROLOGUE_H__
+
+#ifndef __ASSEMBLY__
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include <errno.h>
+
+#define sys_recv(sockfd, ubuf, size, flags) sys_recvfrom(sockfd, ubuf, size, flags, NULL, NULL)
+
+typedef struct prologue_init_args {
+	struct sockaddr_un ctl_sock_addr;
+	unsigned int ctl_sock_addr_len;
+
+	unsigned int arg_s;
+	void *arg_p;
+
+	void *sigframe;
+} prologue_init_args_t;
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Reserve enough space for sigframe.
+ *
+ * FIXME It is rather should be taken from sigframe header.
+ */
+#define PROLOGUE_SGFRAME_SIZE 4096
+
+#define PROLOGUE_INIT_ARGS_SIZE 1024
+
+#endif /* __ASM_PROLOGUE_H__ */
diff --git a/compel/arch/loongarch64/plugins/include/asm/syscall-types.h b/compel/arch/loongarch64/plugins/include/asm/syscall-types.h
new file mode 100644
index 000000000..b883bd8be
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/include/asm/syscall-types.h
@@ -0,0 +1,30 @@
+#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__
+#define COMPEL_ARCH_SYSCALL_TYPES_H__
+
+#include <common/asm/bitsperlong.h>
+/* Types for sigaction, sigprocmask syscalls */
+typedef void rt_signalfn_t(int, siginfo_t *, void *);
+typedef rt_signalfn_t *rt_sighandler_t;
+
+typedef void rt_restorefn_t(void);
+typedef rt_restorefn_t *rt_sigrestore_t;
+
+/* refer to arch/loongarch/include/uapi/asm/signal.h */
+#define _KNSIG	     64
+#define _NSIG_BPW    BITS_PER_LONG
+#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
+
+typedef struct {
+	uint64_t sig[_KNSIG_WORDS];
+} k_rtsigset_t;
+
+typedef struct {
+	rt_sighandler_t rt_sa_handler;
+	unsigned long rt_sa_flags;
+	rt_sigrestore_t rt_sa_restorer;
+	k_rtsigset_t rt_sa_mask;
+} rt_sigaction_t;
+
+#define SA_RESTORER 0x04000000
+
+#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */
diff --git a/compel/arch/loongarch64/plugins/include/features.h b/compel/arch/loongarch64/plugins/include/features.h
new file mode 100644
index 000000000..b4a3cded2
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/include/features.h
@@ -0,0 +1,4 @@
+#ifndef __COMPEL_ARCH_FEATURES_H
+#define __COMPEL_ARCH_FEATURES_H
+
+#endif /* __COMPEL_ARCH_FEATURES_H */
diff --git a/compel/arch/loongarch64/plugins/std/parasite-head.S b/compel/arch/loongarch64/plugins/std/parasite-head.S
new file mode 100644
index 000000000..3a960490e
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/std/parasite-head.S
@@ -0,0 +1,9 @@
+
+#include "common/asm/linkage.h"
+
+	.section .head.text, "ax"
+ENTRY(__export_parasite_head_start)
+	bl	parasite_service;
+	break 0;
+END(__export_parasite_head_start)
+
diff --git a/compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls b/compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls
new file mode 100644
index 000000000..0d08f34e1
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls
@@ -0,0 +1,117 @@
+std-lib-y		+= ./$(PLUGIN_ARCH_DIR)/std/syscalls-64.o
+sys-proto-types		:= $(obj)/include/uapi/std/syscall-types.h
+sys-proto-generic	:= $(obj)/include/uapi/std/syscall.h
+sys-codes-generic	:= $(obj)/include/uapi/std/syscall-codes.h
+sys-codes		 = $(obj)/include/uapi/std/syscall-codes-$(1).h
+sys-proto		 = $(obj)/include/uapi/std/syscall-$(1).h
+sys-def			 = $(PLUGIN_ARCH_DIR)/std/syscalls/syscall_$(1).tbl
+sys-asm			 = $(PLUGIN_ARCH_DIR)/std/syscalls-$(1).S
+sys-asm-common-name	 = std/syscalls/syscall-common-loongarch-$(1).S
+sys-asm-common		 = $(PLUGIN_ARCH_DIR)/$(sys-asm-common-name)
+sys-asm-types		:= $(obj)/include/uapi/std/asm/syscall-types.h
+sys-exec-tbl		 = $(PLUGIN_ARCH_DIR)/std/sys-exec-tbl-$(1).c
+
+sys-bits		:= 64
+
+AV			:= $$$$
+
+define gen-rule-sys-codes
+$(sys-codes): $(sys-def) $(sys-proto-types)
+	$(call msg-gen, $$@)
+	$(Q) echo "/* Autogenerated, don't edit */"					>  $$@
+	$(Q) echo "#ifndef ASM_SYSCALL_CODES_H_$(1)__"					>> $$@
+	$(Q) echo "#define ASM_SYSCALL_CODES_H_$(1)__"					>> $$@
+	$(Q) cat $$< | awk '/^__NR/{SYSN=$(AV)1;					\
+		sub("^__NR", "SYS", SYSN);						\
+		print "\n#ifndef ", $(AV)1;						\
+		print "#define", $(AV)1, $(AV)2;					\
+		print "#endif";								\
+		print "\n#ifndef ", SYSN;						\
+		print "#define ", SYSN, $(AV)1;						\
+		print "#endif";}'							>> $$@
+	$(Q) echo "#endif /* ASM_SYSCALL_CODES_H_$(1)__ */"				>> $$@
+endef
+
+define gen-rule-sys-proto
+$(sys-proto): $(sys-def) $(sys-proto-types)
+	$(call msg-gen, $$@)
+	$(Q) echo "/* Autogenerated, don't edit */"					>  $$@
+	$(Q) echo "#ifndef ASM_SYSCALL_PROTO_H_$(1)__"					>> $$@
+	$(Q) echo "#define ASM_SYSCALL_PROTO_H_$(1)__"					>> $$@
+	$(Q) echo '#include <compel/plugins/std/syscall-codes-$(1).h>'			>> $$@
+	$(Q) echo '#include <compel/plugins/std/syscall-types.h>'			>> $$@
+ifeq ($(1),32)
+	$(Q) echo '#include "asm/syscall32.h"'						>> $$@
+endif
+	$(Q) cat $$< | awk '/^__NR/{print "extern long", $(AV)3,			\
+			substr($(AV)0, index($(AV)0,$(AV)4)), ";"}'			>> $$@
+	$(Q) echo "#endif /* ASM_SYSCALL_PROTO_H_$(1)__ */"				>> $$@
+endef
+
+define gen-rule-sys-asm
+$(sys-asm): $(sys-def) $(sys-asm-common) $(sys-codes) $(sys-proto) $(sys-proto-types)
+	$(call msg-gen, $$@)
+	$(Q) echo "/* Autogenerated, don't edit */"					>  $$@
+	$(Q) echo '#include <compel/plugins/std/syscall-codes-$(1).h>'			>> $$@
+	$(Q) echo '#include "$(sys-asm-common-name)"'					>> $$@
+	$(Q) cat $$< | awk '/^__NR/{print "SYSCALL(", $(AV)3, ",", $(AV)2, ")"}'	>> $$@
+endef
+
+define gen-rule-sys-exec-tbl
+$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic) $(sys-proto-types)
+	$(call msg-gen, $$@)
+	$(Q) echo "/* Autogenerated, don't edit */"					>  $$@
+	$(Q) cat $$< | awk '/^__NR/{print						\
+		"SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}'			>> $$@
+endef
+
+$(sys-codes-generic): $(sys-proto-types)
+	$(call msg-gen, $@)
+	$(Q) echo "/* Autogenerated, don't edit */"			>  $@
+	$(Q) echo "#ifndef __ASM_CR_SYSCALL_CODES_H__"			>> $@
+	$(Q) echo "#define __ASM_CR_SYSCALL_CODES_H__"			>> $@
+	$(Q) echo '#include <compel/plugins/std/syscall-codes-64.h>'	>> $@
+	$(Q) cat $< | awk '/^__NR/{NR32=$$1;				\
+		sub("^__NR", "__NR32", NR32);				\
+		print "\n#ifndef ", NR32;				\
+		print "#define ", NR32, $$2;				\
+		print "#endif";}'					>> $@
+	$(Q) echo "#endif /* __ASM_CR_SYSCALL_CODES_H__ */"		>> $@
+mrproper-y += $(sys-codes-generic)
+
+$(sys-proto-generic): $(strip $(call map,sys-proto,$(sys-bits))) $(sys-proto-types)
+	$(call msg-gen, $@)
+	$(Q) echo "/* Autogenerated, don't edit */"			>  $@
+	$(Q) echo "#ifndef __ASM_CR_SYSCALL_PROTO_H__"			>> $@
+	$(Q) echo "#define __ASM_CR_SYSCALL_PROTO_H__"			>> $@
+	$(Q) echo ""							>> $@
+	$(Q) echo '#include <compel/plugins/std/syscall-64.h>'		>> $@
+	$(Q) echo ""							>> $@
+	$(Q) echo "#endif /* __ASM_CR_SYSCALL_PROTO_H__ */"		>> $@
+mrproper-y += $(sys-proto-generic)
+
+define gen-rule-sys-exec-tbl
+$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic)
+	$(call msg-gen, $$@)
+	$(Q) echo "/* Autogenerated, don't edit */"			>  $$@
+	$(Q) cat $$< | awk '/^__NR/{print				\
+		"SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}'	>> $$@
+endef
+
+$(eval $(call map,gen-rule-sys-codes,$(sys-bits)))
+$(eval $(call map,gen-rule-sys-proto,$(sys-bits)))
+$(eval $(call map,gen-rule-sys-asm,$(sys-bits)))
+$(eval $(call map,gen-rule-sys-exec-tbl,$(sys-bits)))
+
+$(sys-asm-types): $(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h
+	$(call msg-gen, $@)
+	$(Q) ln -s ../../../../../../$(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h $(sys-asm-types)
+
+std-headers-deps	+= $(call sys-codes,$(sys-bits))
+std-headers-deps	+= $(call sys-proto,$(sys-bits))
+std-headers-deps	+= $(call sys-asm,$(sys-bits))
+std-headers-deps	+= $(call sys-exec-tbl,$(sys-bits))
+std-headers-deps	+= $(sys-codes-generic)
+std-headers-deps	+= $(sys-proto-generic)
+std-headers-deps	+= $(sys-asm-types)
+mrproper-y		+= $(std-headers-deps)
diff --git a/compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S b/compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S
new file mode 100644
index 000000000..fff894466
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S
@@ -0,0 +1,44 @@
+#include "common/asm/linkage.h"
+
+#define SYSCALL(name, opcode)		\
+ENTRY(name);					\
+	addi.d	$a7, $zero, opcode;	\
+	syscall 0;					\
+	jirl	$r0, $r1, 0;		\
+END(name)
+
+#ifndef AT_FDCWD
+#define AT_FDCWD	-100
+#endif
+
+#ifndef AT_REMOVEDIR
+#define AT_REMOVEDIR	0x200
+#endif
+
+ENTRY(sys_open)
+	or		$a3, $zero, $a2
+	or		$a2, $zero, $a1
+	or		$a1, $zero, $a0
+	addi.d	$a0, $zero, AT_FDCWD
+	b		sys_openat
+END(sys_open)
+
+ENTRY(sys_mkdir)
+	or		$a3, $zero, $a2
+	or		$a2, $zero, $a1
+	or		$a1, $zero, $a0
+	addi.d	$a0, $zero, AT_FDCWD
+	b		sys_mkdirat
+END(sys_mkdir)
+
+ENTRY(sys_rmdir)
+	addi.d	$a2, $zero, AT_REMOVEDIR
+	or		$a1, $zero, $a0
+	addi.d	$a0, $zero, AT_FDCWD
+	b		sys_unlinkat
+END(sys_rmdir)
+
+ENTRY(__cr_restore_rt)
+	addi.d	$a7, $zero, __NR_rt_sigreturn
+	syscall	0
+END(__cr_restore_rt)
diff --git a/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl
new file mode 100644
index 000000000..83dcdab4a
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl
@@ -0,0 +1,122 @@
+#
+# System calls table, please make sure the table consist only the syscalls
+# really used somewhere in project.
+# from kernel/linux-3.10.84/arch/mips/include/uapi/asm/unistd.h Linux 64-bit syscalls are in the range from 5000 to 5999.
+#
+# __NR_name			code		name			arguments
+# -------------------------------------------------------------------------------------------------------------------------------------------------------------
+__NR_io_setup			0	sys_io_setup		(unsigned nr_events, aio_context_t *ctx)
+__NR_io_submit			2	sys_io_submit		(aio_context_t ctx, long nr, struct iocb **iocbpp)
+__NR_io_getevents		4	sys_io_getevents	(aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo)
+__NR_fcntl			25	sys_fcntl		(int fd, int type, long arg)
+__NR_ioctl			29	sys_ioctl		(unsigned int fd, unsigned int cmd, unsigned long arg)
+__NR_flock			32	sys_flock		(int fd, unsigned long cmd)
+__NR_mkdirat			34	sys_mkdirat             (int dfd, const char *pathname, int flag)
+__NR_unlinkat			35	sys_unlinkat            (int dfd, const char *pathname, int flag)
+__NR_umount2			39	sys_umount2		(char *name, int flags)
+__NR_mount			40	sys_mount		(char *dev_nmae, char *dir_name, char *type, unsigned long flags, void *data)
+__NR_fallocate			47	sys_fallocate		(int fd, int mode, loff_t offset, loff_t len)
+__NR_close			57	sys_close		(int fd)
+__NR_openat			56	sys_openat		(int dfd, const char *filename, int flags, int mode)
+__NR_lseek			62	sys_lseek		(int fd, unsigned long offset, unsigned long origin)
+__NR_read			63	sys_read		(int fd, void *buf, unsigned long count)
+__NR_write			64	sys_write		(int fd, const void *buf, unsigned long count)
+__NR_pread64			67	sys_pread		(unsigned int fd, char *buf, size_t count, loff_t pos)
+__NR_preadv			69	sys_preadv_raw		(int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h)
+__NR_ppoll			73	sys_ppoll		(struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)
+__NR_signalfd4			74	sys_signalfd		(int fd, k_rtsigset_t *mask, size_t sizemask, int flags)
+__NR_vmsplice			75	sys_vmsplice		(int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags)
+__NR_readlinkat			78	sys_readlinkat		(int fd, const char *path, char *buf, int bufsize)
+__NR_timerfd_settime		86	sys_timerfd_settime	(int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr)
+__NR_capget			90	sys_capget		(struct cap_header *h, struct cap_data *d)
+__NR_capset			91	sys_capset		(struct cap_header *h, struct cap_data *d)
+__NR_personality		92	sys_personality		(unsigned int personality)
+__NR_exit			93	sys_exit		(unsigned long error_code)
+__NR_exit_group			94	sys_exit_group		(int error_code)
+__NR_waitid			95	sys_waitid		(int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
+__NR_set_tid_address		96	sys_set_tid_address	(int *tid_addr)
+__NR_futex			98	sys_futex		(uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3)
+__NR_set_robust_list		99	sys_set_robust_list	(struct robust_list_head *head, size_t len)
+__NR_get_robust_list		100	sys_get_robust_list	(int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
+__NR_nanosleep			101	sys_nanosleep		(struct timespec *req, struct timespec *rem)
+__NR_getitimer			102	sys_getitimer		(int which, const struct itimerval *val)
+__NR_setitimer			103	sys_setitimer		(int which, const struct itimerval *val, struct itimerval *old)
+__NR_sys_timer_create		107	sys_timer_create	(clockid_t which_clock, struct sigevent *timer_event_spec, kernel_timer_t *created_timer_id)
+__NR_sys_timer_gettime		108	sys_timer_gettime	(int timer_id, const struct itimerspec *setting)
+__NR_sys_timer_getoverrun	109	sys_timer_getoverrun	(int timer_id)
+__NR_sys_timer_settime		110	sys_timer_settime	(kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting)
+__NR_sys_timer_delete		111	sys_timer_delete	(kernel_timer_t timer_id)
+__NR_clock_gettime		113	sys_clock_gettime	(clockid_t which_clock, struct timespec *tp)
+__NR_sched_setscheduler		119	sys_sched_setscheduler	(int pid, int policy, struct sched_param *p)
+__NR_restart_syscall		128	sys_restart_syscall	(void)
+__NR_kill			129	sys_kill		(long pid, int sig)
+__NR_sigaltstack		132	sys_sigaltstack		(const void *uss, void *uoss)
+__NR_rt_sigaction		134	sys_sigaction		(int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact, size_t sigsetsize)
+__NR_rt_sigprocmask		135	sys_sigprocmask		(int how, k_rtsigset_t *set, k_rtsigset_t *old, size_t sigsetsize)
+__NR_rt_sigqueueinfo		138	sys_rt_sigqueueinfo	(pid_t pid, int sig, siginfo_t *info)
+__NR_rt_sigreturn		139	sys_rt_sigreturn	(void)
+__NR_setpriority		140	sys_setpriority		(int which, int who, int nice)
+__NR_setresuid			147	sys_setresuid		(int uid, int euid, int suid)
+__NR_getresuid			148	sys_getresuid		(int *uid, int *euid, int *suid)
+__NR_setresgid			149	sys_setresgid		(int gid, int egid, int sgid)
+__NR_getresgid			150	sys_getresgid		(int *gid, int *egid, int *sgid)
+__NR_getpgid			155	sys_getpgid		(pid_t pid)
+__NR_setfsuid			151	sys_setfsuid		(int fsuid)
+__NR_setfsgid			152	sys_setfsgid		(int fsgid)
+__NR_getsid			156	sys_getsid		(void)
+__NR_getgroups			158	sys_getgroups		(int gsize, unsigned int *groups)
+__NR_setgroups			159	sys_setgroups		(int gsize, unsigned int *groups)
+__NR_setrlimit			164	sys_setrlimit		(int resource, struct krlimit *rlim)
+__NR_umask			166	sys_umask		(int mask)
+__NR_prctl			167	sys_prctl		(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
+__NR_gettimeofday		169	sys_gettimeofday	(struct timeval *tv, struct timezone *tz)
+__NR_getpid			172	sys_getpid		(void)
+__NR_ptrace			177	sys_ptrace		(long request, pid_t pid, void *addr, void *data)
+__NR_gettid			178	sys_gettid		(void)
+__NR_shmat			196	sys_shmat		(int shmid, void *shmaddr, int shmflag)
+__NR_socket			198	sys_socket		(int domain, int type, int protocol)
+__NR_bind			200	sys_bind		(int sockfd, const struct sockaddr *addr, int addrlen)
+__NR_connect			203	sys_connect		(int sockfd, struct sockaddr *addr, int addrlen)
+__NR_sendto			206	sys_sendto		(int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len)
+__NR_recvfrom			207	sys_recvfrom		(int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len)
+__NR_setsockopt			208	sys_setsockopt		(int sockfd, int level, int optname, const void *optval, socklen_t optlen)
+__NR_getsockopt			209	sys_getsockopt		(int sockfd, int level, int optname, const void *optval, socklen_t *optlen)
+__NR_shutdown			210	sys_shutdown		(int sockfd, int how)
+__NR_sendmsg			211	sys_sendmsg		(int sockfd, const struct msghdr *msg, int flags)
+__NR_recvmsg			212	sys_recvmsg		(int sockfd, struct msghdr *msg, int flags)
+__NR_brk			214	sys_brk			(void *addr)
+__NR_munmap			215	sys_munmap		(void *addr, unsigned long len)
+__NR_mremap			216	sys_mremap		(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr)
+__NR_clone			220	sys_clone		(unsigned long flags, void *child_stack, void *parent_tid, unsigned long newtls, void *child_tid)
+__NR_mmap			222	sys_mmap		(void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset)
+__NR_mprotect			226	sys_mprotect		(const void *addr, unsigned long len, unsigned long prot)
+__NR_mincore			232	sys_mincore		(void *addr, unsigned long size, unsigned char *vec)
+__NR_madvise			233	sys_madvise		(unsigned long start, size_t len, int behavior)
+__NR_rt_tgsigqueueinfo		240	sys_rt_tgsigqueueinfo	(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+__NR_wait4			260	sys_wait4		(int pid, int *status, int options, struct rusage *ru)
+__NR_fanotify_init		262	sys_fanotify_init	(unsigned int flags, unsigned int event_f_flags)
+__NR_fanotify_mark		263	sys_fanotify_mark	(int fanotify_fd, unsigned int flags, uint64_t mask, int dfd, const char *pathname)
+__NR_open_by_handle_at		265	sys_open_by_handle_at	(int mountdirfd, struct file_handle *handle, int flags)
+__NR_setns			268	sys_setns		(int fd, int nstype)
+__NR_kcmp			272	sys_kcmp		(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
+__NR_seccomp			277	sys_seccomp		(unsigned int op, unsigned int flags, const char *uargs)
+__NR_memfd_create		279	sys_memfd_create	(const char *name, unsigned int flags)
+__NR_userfaultfd		282	sys_userfaultfd		(int flags)
+__NR_membarrier			283	sys_membarrier		(int cmd, unsigned int flags, int cpu_id)
+__NR_rseq			293	sys_rseq		(void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+__NR_open_tree			428	sys_open_tree		(int dirfd, const char *pathname, unsigned int flags)
+__NR_move_mount			429	sys_move_mount		(int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags)
+__NR_fsopen			430	sys_fsopen		(char *fsname, unsigned int flags)
+__NR_fsconfig			431	sys_fsconfig		(int fd, unsigned int cmd, const char *key, const char *value, int aux)
+__NR_fsmount			432	sys_fsmount		(int fd, unsigned int flags, unsigned int attr_flags)
+__NR_pidfd_open			434	sys_pidfd_open		(pid_t pid, unsigned int flags)
+__NR_clone3			435	sys_clone3		(struct clone_args *uargs, size_t size)
+__NR_openat2			437	sys_openat2		(int dirfd, char *pathname, struct open_how *how, size_t size)
+__NR_pidfd_getfd		438	sys_pidfd_getfd		(int pidfd, int targetfd, unsigned int flags)
+#__NR_dup2			!	sys_dup2		(int oldfd, int newfd)
+#__NR_rmdir			!	sys_rmdir		(const char *name)
+#__NR_unlink			!	sys_unlink		(char *pathname)
+#__NR_cacheflush		!	sys_cacheflush		(char *addr, int nbytes, int cache)
+#__NR_set_thread_area		!	sys_set_thread_area	(unsigned long *addr)
+#__NR_mkdir			!	sys_mkdir		(const char *name, int mode)
+#__NR_open			!	sys_open		(const char *filename, unsigned long flags, unsigned long mode)
diff --git a/compel/arch/loongarch64/scripts/compel-pack.lds.S b/compel/arch/loongarch64/scripts/compel-pack.lds.S
new file mode 100644
index 000000000..cfb7a2fb3
--- /dev/null
+++ b/compel/arch/loongarch64/scripts/compel-pack.lds.S
@@ -0,0 +1,32 @@
+OUTPUT_ARCH(loongarch)
+EXTERN(__export_parasite_head_start)
+
+SECTIONS
+{
+	.crblob 0x0 : {
+		*(.head.text)
+		ASSERT(DEFINED(__export_parasite_head_start),
+			"Symbol __export_parasite_head_start is missing");
+		*(.text*)
+		. = ALIGN(32);
+		*(.data*)
+		. = ALIGN(32);
+		*(.rodata*)
+		. = ALIGN(32);
+		*(.bss*)
+		. = ALIGN(32);
+		*(.got*)
+		. = ALIGN(32);
+		*(.toc*)
+		. = ALIGN(32);
+	} =0x00000000,
+
+	/DISCARD/ : {
+		*(.debug*)
+		*(.comment*)
+		*(.note*)
+		*(.group*)
+		*(.eh_frame*)
+		*(*)
+	}
+}
diff --git a/compel/arch/loongarch64/src/lib/cpu.c b/compel/arch/loongarch64/src/lib/cpu.c
new file mode 100644
index 000000000..172b90e27
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/cpu.c
@@ -0,0 +1,41 @@
+#include <string.h>
+#include <stdbool.h>
+
+#include "compel-cpu.h"
+#include "common/bitops.h"
+#include "common/compiler.h"
+#include "log.h"
+
+#undef LOG_PREFIX
+#define LOG_PREFIX "cpu: "
+
+static compel_cpuinfo_t rt_info;
+static bool rt_info_done = false;
+
+void compel_set_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
+{
+}
+
+void compel_clear_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
+{
+}
+
+int compel_test_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
+{
+	return 0;
+}
+
+int compel_cpuid(compel_cpuinfo_t *c)
+{
+	return 0;
+}
+
+bool compel_cpu_has_feature(unsigned int feature)
+{
+	if (!rt_info_done) {
+		compel_cpuid(&rt_info);
+		rt_info_done = true;
+	}
+
+	return compel_test_cpu_cap(&rt_info, feature);
+}
diff --git a/compel/arch/loongarch64/src/lib/handle-elf-host.c b/compel/arch/loongarch64/src/lib/handle-elf-host.c
new file mode 100644
index 000000000..a605a5a45
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/handle-elf-host.c
@@ -0,0 +1,22 @@
+#include <string.h>
+#include <errno.h>
+
+#include "handle-elf.h"
+#include "piegen.h"
+#include "log.h"
+
+static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = {
+	0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+extern int __handle_elf(void *mem, size_t size);
+
+int handle_binary(void *mem, size_t size)
+{
+	if (memcmp(mem, elf_ident_64_le, sizeof(elf_ident_64_le)) == 0)
+		return __handle_elf(mem, size);
+
+	pr_err("Unsupported Elf format detected\n");
+	return -EINVAL;
+}
diff --git a/compel/arch/loongarch64/src/lib/handle-elf.c b/compel/arch/loongarch64/src/lib/handle-elf.c
new file mode 100644
index 000000000..a605a5a45
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/handle-elf.c
@@ -0,0 +1,22 @@
+#include <string.h>
+#include <errno.h>
+
+#include "handle-elf.h"
+#include "piegen.h"
+#include "log.h"
+
+static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = {
+	0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+extern int __handle_elf(void *mem, size_t size);
+
+int handle_binary(void *mem, size_t size)
+{
+	if (memcmp(mem, elf_ident_64_le, sizeof(elf_ident_64_le)) == 0)
+		return __handle_elf(mem, size);
+
+	pr_err("Unsupported Elf format detected\n");
+	return -EINVAL;
+}
diff --git a/compel/arch/loongarch64/src/lib/include/handle-elf.h b/compel/arch/loongarch64/src/lib/include/handle-elf.h
new file mode 100644
index 000000000..b0a66ef87
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/handle-elf.h
@@ -0,0 +1,8 @@
+#ifndef COMPEL_HANDLE_ELF_H__
+#define COMPEL_HANDLE_ELF_H__
+
+#include "elf64-types.h"
+
+#define arch_is_machine_supported(e_machine) (e_machine == EM_LOONGARCH)
+
+#endif /* COMPEL_HANDLE_ELF_H__ */
diff --git a/compel/arch/loongarch64/src/lib/include/syscall.h b/compel/arch/loongarch64/src/lib/include/syscall.h
new file mode 100644
index 000000000..ac3e2799a
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/syscall.h
@@ -0,0 +1,8 @@
+#ifndef __COMPEL_SYSCALL_H__
+#define __COMPEL_SYSCALL_H__
+
+#ifndef SIGSTKFLT
+#define SIGSTKFLT 16
+#endif
+
+#endif
diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h
new file mode 100644
index 000000000..21eb1309f
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h
@@ -0,0 +1,6 @@
+#ifndef __COMPEL_BREAKPOINTS_H__
+#define __COMPEL_BREAKPOINTS_H__
+#define ARCH_SI_TRAP TRAP_BRKPT
+extern int ptrace_set_breakpoint(pid_t pid, void *addr);
+extern int ptrace_flush_breakpoints(pid_t pid);
+#endif
diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h
new file mode 100644
index 000000000..e568df789
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h
@@ -0,0 +1,6 @@
+#ifndef __CR_ASM_CPU_H__
+#define __CR_ASM_CPU_H__
+
+typedef struct {
+} compel_cpuinfo_t;
+#endif /* __CR_ASM_CPU_H__ */
diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h
new file mode 100644
index 000000000..7f476d541
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h
@@ -0,0 +1,4 @@
+#ifndef __CR_ASM_FPU_H__
+#define __CR_ASM_FPU_H__
+
+#endif /* __CR_ASM_FPU_H__ */
diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h
new file mode 100644
index 000000000..0b047a5b0
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h
@@ -0,0 +1,67 @@
+#ifndef UAPI_COMPEL_ASM_TYPES_H__
+#define UAPI_COMPEL_ASM_TYPES_H__
+
+#include <stdint.h>
+
+#define SIGMAX	   64
+#define SIGMAX_OLD 31
+
+/*
+ * From the Linux kernel header arch/loongarch/include/uapi/asm/ptrace.h
+ *
+ * A thread LoongArch CPU context
+ *
+ * struct user_fp_state {
+ *     uint64_t    fpr[32];
+ *     uint64_t    fcc;
+ *     uint32_t    fcsr;
+ * };
+ *
+ * struct user_pt_regs {
+ *     unsigned long regs[32];
+ *     unsigned long csr_era;
+ *     unsigned long csr_badv;
+ *     unsigned long reserved[11];
+ * };
+ */
+
+struct user_gp_regs {
+	uint64_t regs[32];
+	uint64_t orig_a0;
+	uint64_t pc;
+	uint64_t csr_badv;
+	uint64_t reserved[10];
+} __attribute__((aligned(8)));
+
+struct user_fp_regs {
+	uint64_t regs[32];
+	uint64_t fcc;
+	uint32_t fcsr;
+};
+
+typedef struct user_gp_regs user_regs_struct_t;
+typedef struct user_fp_regs user_fpregs_struct_t;
+
+#define user_regs_native(regs) true
+
+#define __compel_arch_fetch_thread_area(tid, th) 0
+#define compel_arch_fetch_thread_area(tctl)	 0
+#define compel_arch_get_tls_task(ctl, tls)
+#define compel_arch_get_tls_thread(tctl, tls)
+
+#define REG_RES(r)	   ((uint64_t)(r).regs[4])
+#define REG_IP(r)	   ((uint64_t)(r).pc)
+#define REG_SP(r)	   ((uint64_t)(r).regs[3])
+#define REG_SYSCALL_NR(r)  ((uint64_t)(r).regs[11])
+#define SET_REG_IP(r, val) ((r).pc = (val))
+
+#define GPR_NUM 32
+#define FPR_NUM 32
+
+#define __NR(syscall, compat)   \
+	({                      \
+		(void)compat;   \
+		__NR_##syscall; \
+	})
+
+#endif /* UAPI_COMPEL_ASM_TYPES_H__ */
diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h
new file mode 100644
index 000000000..fcb545a1d
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h
@@ -0,0 +1,86 @@
+#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__
+#define UAPI_COMPEL_ASM_SIGFRAME_H__
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include <compel/asm/fpu.h>
+#include <compel/plugins/std/syscall-codes.h>
+
+#include <asm/types.h>
+
+#define rt_sigcontext sigcontext
+/* sigcontext defined in usr/include/uapi/asm/sigcontext.h*/
+#include <compel/sigframe-common.h>
+typedef __u32 u32;
+
+typedef struct sigcontext_t {
+	__u64 pc;
+	__u64 regs[32];
+	__u32 flags;
+	__u64 extcontext[0] __attribute__((__aligned__(16)));
+} sigcontext_t;
+
+typedef struct context_info_t {
+	__u32 magic;
+	__u32 size;
+	__u64 padding;
+} context_info_t;
+
+#define FPU_CTX_MAGIC 0x46505501
+#define FPU_CTX_ALIGN 8
+typedef struct fpu_context_t {
+	__u64 regs[32];
+	__u64 fcc;
+	__u64 fcsr;
+} fpu_context_t;
+
+typedef struct ucontext {
+	unsigned long uc_flags;
+	struct ucontext *uc_link;
+	stack_t uc_stack;
+	sigset_t uc_sigmask;
+	__u8 __unused[1024 / 8 - sizeof(sigset_t)];
+	sigcontext_t uc_mcontext;
+} ucontext;
+
+/* Copy from the kernel source arch/loongarch/kernel/signal.c */
+struct rt_sigframe {
+	rt_siginfo_t rs_info;
+	ucontext rs_uc;
+};
+
+#define RT_SIGFRAME_UC(rt_sigframe)	 (&(rt_sigframe->rs_uc))
+#define RT_SIGFRAME_SIGMASK(rt_sigframe) ((k_rtsigset_t *)&RT_SIGFRAME_UC(rt_sigframe)->uc_sigmask)
+#define RT_SIGFRAME_SIGCTX(rt_sigframe)	 (&(RT_SIGFRAME_UC(rt_sigframe)->uc_mcontext))
+#define RT_SIGFRAME_REGIP(rt_sigframe)	 ((long unsigned int)(RT_SIGFRAME_SIGCTX(rt_sigframe)->pc))
+#define RT_SIGFRAME_HAS_FPU(rt_sigframe) (1)
+
+#define RT_SIGFRAME_FPU(rt_sigframe)                                                                 \
+	({                                                                                           \
+		context_info_t *ctx = (context_info_t *)RT_SIGFRAME_SIGCTX(rt_sigframe)->extcontext; \
+		ctx->magic = FPU_CTX_MAGIC;                                                          \
+		ctx->size = sizeof(context_info_t) + sizeof(fpu_context_t);                          \
+		(fpu_context_t *)((char *)ctx + sizeof(context_info_t));                             \
+	})
+
+#define RT_SIGFRAME_OFFSET(rt_sigframe) 0
+
+/* clang-format off */
+#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe)  \
+    asm volatile(                               \
+            "addi.d $sp, %0, 0 \n"              \
+            "addi.d $a7, $zero, "__stringify(__NR_rt_sigreturn)"    \n" \
+            "syscall   0"                       \
+            :                                   \
+            :"r"(new_sp)                        \
+            : "$a7", "memory")
+/* clang-format on */
+
+int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe);
+
+#define rt_sigframe_erase_sigset(sigframe)	memset(RT_SIGFRAME_SIGMASK(sigframe), 0, sizeof(k_rtsigset_t))
+#define rt_sigframe_copy_sigset(sigframe, from) memcpy(RT_SIGFRAME_SIGMASK(sigframe), from, sizeof(k_rtsigset_t))
+
+#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */
diff --git a/compel/arch/loongarch64/src/lib/infect.c b/compel/arch/loongarch64/src/lib/infect.c
new file mode 100644
index 000000000..190c39227
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/infect.c
@@ -0,0 +1,204 @@
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+#include <compel/asm/fpu.h>
+#include <compel/cpu.h>
+#include "errno.h"
+#include <compel/plugins/std/syscall-codes.h>
+#include <compel/plugins/std/syscall.h>
+#include "common/err.h"
+#include "common/page.h"
+#include "asm/infect-types.h"
+#include "ptrace.h"
+#include "infect.h"
+#include "infect-priv.h"
+#include "log.h"
+#include "common/bug.h"
+
+/*
+ * Injected syscall instruction
+ * loongarch64 is Little Endian
+ */
+const char code_syscall[] = {
+	0x00, 0x00, 0x2b, 0x00, /* syscall    */
+	0x00, 0x00, 0x2a, 0x00	/*  break      */
+};
+
+int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
+{
+	sigcontext_t *sc;
+	fpu_context_t *fpu;
+
+	sc = RT_SIGFRAME_SIGCTX(sigframe);
+	memcpy(sc->regs, regs->regs, sizeof(regs->regs));
+	sc->pc = regs->pc;
+
+	fpu = RT_SIGFRAME_FPU(sigframe);
+	memcpy(fpu->regs, fpregs->regs, sizeof(fpregs->regs));
+	fpu->fcc = fpregs->fcc;
+	fpu->fcsr = fpregs->fcsr;
+	return 0;
+}
+
+int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe)
+{
+	return 0;
+}
+
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
+			 void *arg, __maybe_unused unsigned long flags)
+{
+	user_fpregs_struct_t tmp, *fpregs = ext_regs ? ext_regs : &tmp;
+	struct iovec iov;
+	int ret;
+
+	pr_info("Dumping GP/FPU registers for %d\n", pid);
+
+	iov.iov_base = regs;
+	iov.iov_len = sizeof(user_regs_struct_t);
+	if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov))) {
+		pr_perror("Failed to obtain CPU registers for %d", pid);
+		goto err;
+	}
+
+	/*
+	 * Refer to Linux kernel arch/loongarch/kernel/signal.c
+	 */
+	if (regs->regs[0]) {
+		switch (regs->regs[4]) {
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			regs->regs[4] = regs->orig_a0;
+			regs->pc -= 4;
+			break;
+		case -ERESTART_RESTARTBLOCK:
+			regs->regs[4] = regs->orig_a0;
+			regs->regs[11] = __NR_restart_syscall;
+			regs->pc -= 4;
+			break;
+		}
+		regs->regs[0] = 0; /* Don't deal with this again.  */
+	}
+
+	iov.iov_base = fpregs;
+	iov.iov_len = sizeof(user_fpregs_struct_t);
+	if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) {
+		pr_perror("Failed to obtain FPU registers for %d", pid);
+		goto err;
+	}
+
+	ret = save(pid, arg, regs, fpregs);
+err:
+	return 0;
+}
+
+int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
+{
+	struct iovec iov;
+
+	pr_info("Restoring GP/FPU registers for %d\n", pid);
+
+	iov.iov_base = ext_regs;
+	iov.iov_len = sizeof(*ext_regs);
+	if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov)) {
+		pr_perror("Failed to set FPU registers for %d", pid);
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * Registers $4 ~ $11 represents arguments a0 ~ a7, especially a7 is
+ * used as syscall number.
+ */
+int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1, unsigned long arg2,
+		   unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6)
+{
+	int err;
+	user_regs_struct_t regs = ctl->orig.regs;
+
+	regs.regs[11] = (unsigned long)nr;
+	regs.regs[4] = arg1;
+	regs.regs[5] = arg2;
+	regs.regs[6] = arg3;
+	regs.regs[7] = arg4;
+	regs.regs[8] = arg5;
+	regs.regs[9] = arg6;
+	err = compel_execute_syscall(ctl, &regs, code_syscall);
+
+	*ret = regs.regs[4];
+
+	return err;
+}
+
+void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+	long map;
+	int err;
+
+	err = compel_syscall(ctl, __NR_mmap, &map, (unsigned long)addr, length, prot, flags, fd, offset >> PAGE_SHIFT);
+
+	if (err < 0 || IS_ERR_VALUE(map)) {
+		pr_err("remote mmap() failed: %s\n", strerror(-map));
+		return NULL;
+	}
+
+	return (void *)map;
+}
+
+/*
+ * regs must be inited when calling this function from original context
+ */
+void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
+{
+	regs->pc = new_ip;
+	if (stack)
+		regs->regs[4] = (unsigned long)stack;
+}
+
+bool arch_can_dump_task(struct parasite_ctl *ctl)
+{
+	return true;
+}
+
+int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
+{
+	long ret;
+	int err;
+
+	err = compel_syscall(ctl, __NR_sigaltstack, &ret, 0, (unsigned long)&s->rs_uc.uc_stack, 0, 0, 0, 0);
+	return err ? err : ret;
+}
+
+/*
+ * TODO: add feature
+ */
+int ptrace_set_breakpoint(pid_t pid, void *addr)
+{
+	return 0;
+}
+
+int ptrace_flush_breakpoints(pid_t pid)
+{
+	return 0;
+}
+
+/*
+ * Refer to Linux kernel arch/loongarch/include/asm/processor.h
+ */
+#define TASK_SIZE32	(1UL) << 31
+#define TASK_SIZE64_MIN (1UL) << 40
+#define TASK_SIZE64_MAX (1UL) << 48
+
+unsigned long compel_task_size(void)
+{
+	unsigned long task_size;
+	for (task_size = TASK_SIZE64_MIN; task_size < TASK_SIZE64_MAX; task_size <<= 1)
+		if (munmap((void *)task_size, page_size()))
+			break;
+	return task_size;
+}
diff --git a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl
index 505ec849d..ad3d44634 100644
--- a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl
+++ b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl
@@ -84,7 +84,7 @@ __NR_sys_timer_settime		5217		sys_timer_settime	(kernel_timer_t timer_id, int fl
 __NR_sys_timer_gettime		5218		sys_timer_gettime	(int timer_id, const struct itimerspec *setting)
 __NR_sys_timer_getoverrun	5219		sys_timer_getoverrun	(int timer_id)
 __NR_sys_timer_delete		5220		sys_timer_delete	(kernel_timer_t timer_id)
-__NR_clock_gettime		5222		sys_clock_gettime	(const clockid_t which_clock, const struct timespec *tp)
+__NR_clock_gettime		5222		sys_clock_gettime	(clockid_t which_clock, struct timespec *tp)
 __NR_exit_group			5205		sys_exit_group		(int error_code)
 __NR_set_thread_area		5242		sys_set_thread_area	(unsigned long *addr)
 __NR_openat			5247		sys_openat		(int dfd, const char *filename, int flags, int mode)
@@ -115,7 +115,9 @@ __NR_fsopen			5430		sys_fsopen		(char *fsname, unsigned int flags)
 __NR_fsconfig			5431		sys_fsconfig		(int fd, unsigned int cmd, const char *key, const char *value, int aux)
 __NR_fsmount			5432		sys_fsmount		(int fd, unsigned int flags, unsigned int attr_flags)
 __NR_clone3			5435		sys_clone3		(struct clone_args *uargs, size_t size)
+__NR_close_range		5436		sys_close_range		(unsigned int fd, unsigned int max_fd, unsigned int flags)
 __NR_pidfd_open			5434		sys_pidfd_open		(pid_t pid, unsigned int flags)
 __NR_openat2			5437		sys_openat2		(int dirfd, char *pathname, struct open_how *how, size_t size)
 __NR_pidfd_getfd		5438		sys_pidfd_getfd		(int pidfd, int targetfd, unsigned int flags)
 __NR_rseq		        5327		sys_rseq		(void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+__NR_membarrier 		5318		sys_membarrier		(int cmd, unsigned int flags, int cpu_id)
diff --git a/compel/arch/mips/src/lib/handle-elf.c b/compel/arch/mips/src/lib/handle-elf.c
index a605a5a45..e086761c2 100644
--- a/compel/arch/mips/src/lib/handle-elf.c
+++ b/compel/arch/mips/src/lib/handle-elf.c
@@ -5,18 +5,31 @@
 #include "piegen.h"
 #include "log.h"
 
-static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = {
-	0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-};
-
 extern int __handle_elf(void *mem, size_t size);
 
 int handle_binary(void *mem, size_t size)
 {
-	if (memcmp(mem, elf_ident_64_le, sizeof(elf_ident_64_le)) == 0)
-		return __handle_elf(mem, size);
+	Elf64_Ehdr *ehdr = (Elf64_Ehdr *)mem;
 
-	pr_err("Unsupported Elf format detected\n");
-	return -EINVAL;
+	/* check ELF magic */
+	if (ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
+	    ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
+	    ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
+	    ehdr->e_ident[EI_MAG3] != ELFMAG3) {
+		pr_err("Invalid ELF magic\n");
+		return -EINVAL;
+	}
+
+	/* check ELF class and data encoding */
+	if (ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
+	    ehdr->e_ident[EI_DATA] != ELFDATA2LSB) {
+		pr_err("Unsupported ELF class or data encoding\n");
+		return -EINVAL;
+	}
+
+	if (ehdr->e_ident[EI_ABIVERSION] != 0) {
+		pr_warn("Unusual ABI version: %d\n", ehdr->e_ident[EI_ABIVERSION]);
+	}
+
+	return __handle_elf(mem, size);
 }
diff --git a/compel/arch/mips/src/lib/infect.c b/compel/arch/mips/src/lib/infect.c
index afa0f5ed5..a1d4865cc 100644
--- a/compel/arch/mips/src/lib/infect.c
+++ b/compel/arch/mips/src/lib/infect.c
@@ -119,10 +119,9 @@ int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigfr
 	return 0;
 }
 
-int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *xs, save_regs_t save,
 			 void *arg, __maybe_unused unsigned long flags)
 {
-	user_fpregs_struct_t xsave = {}, *xs = ext_regs ? ext_regs : &xsave;
 	int ret = -1;
 
 	pr_info("Dumping GP/FPU registers for %d\n", pid);
@@ -150,7 +149,7 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct
 		regs->regs[0] = 0;
 	}
 
-	ret = save(arg, regs, xs);
+	ret = save(pid, arg, regs, xs);
 	return ret;
 }
 
diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
index af40d7104..3deb41cf7 100644
--- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
+++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
@@ -82,7 +82,7 @@ __NR_sys_timer_settime	241		sys_timer_settime	(kernel_timer_t timer_id, int flag
 __NR_sys_timer_gettime	242		sys_timer_gettime	(int timer_id, const struct itimerspec *setting)
 __NR_sys_timer_getoverrun	243		sys_timer_getoverrun	(int timer_id)
 __NR_sys_timer_delete	244		sys_timer_delete	(kernel_timer_t timer_id)
-__NR_clock_gettime	246		sys_clock_gettime	(const clockid_t which_clock, const struct timespec *tp)
+__NR_clock_gettime	246		sys_clock_gettime	(clockid_t which_clock, struct timespec *tp)
 __NR_exit_group		234		sys_exit_group		(int error_code)
 __NR_waitid		272		sys_waitid		(int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
 __NR_set_robust_list	300		sys_set_robust_list	(struct robust_list_head *head, size_t len)
@@ -114,7 +114,9 @@ __NR_fsopen		430		sys_fsopen		(char *fsname, unsigned int flags)
 __NR_fsconfig		431		sys_fsconfig		(int fd, unsigned int cmd, const char *key, const char *value, int aux)
 __NR_fsmount		432		sys_fsmount		(int fd, unsigned int flags, unsigned int attr_flags)
 __NR_clone3		435		sys_clone3		(struct clone_args *uargs, size_t size)
+__NR_close_range	436		sys_close_range		(unsigned int fd, unsigned int max_fd, unsigned int flags)
 __NR_pidfd_open		434		sys_pidfd_open		(pid_t pid, unsigned int flags)
 __NR_openat2		437		sys_openat2		(int dirfd, char *pathname, struct open_how *how, size_t size)
 __NR_pidfd_getfd	438		sys_pidfd_getfd		(int pidfd, int targetfd, unsigned int flags)
 __NR_rseq       	387		sys_rseq		(void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+__NR_membarrier 	365		sys_membarrier		(int cmd, unsigned int flags, int cpu_id)
diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h
index eb12c9f7c..0c4ccb648 100644
--- a/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h
+++ b/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h
@@ -14,7 +14,7 @@
  */
 #include <signal.h>
 
-// XXX: the idetifier rt_sigcontext is expected to be struct by the CRIU code
+// XXX: the identifier rt_sigcontext is expected to be struct by the CRIU code
 #define rt_sigcontext sigcontext
 
 #include <compel/sigframe-common.h>
@@ -23,6 +23,11 @@
 
 /* Copied from the Linux kernel header arch/powerpc/include/asm/ptrace.h */
 #define USER_REDZONE_SIZE 512
+#if _CALL_ELF != 2
+#error Only supporting ABIv2.
+#else
+#define STACK_FRAME_MIN_SIZE 32
+#endif
 
 /* Copied from the Linux kernel source file arch/powerpc/kernel/signal_64.c */
 #define TRAMP_SIZE 6
diff --git a/compel/arch/ppc64/src/lib/infect.c b/compel/arch/ppc64/src/lib/infect.c
index 61cd6e985..54abd48a4 100644
--- a/compel/arch/ppc64/src/lib/infect.c
+++ b/compel/arch/ppc64/src/lib/infect.c
@@ -11,6 +11,7 @@
 #include "log.h"
 #include "common/bug.h"
 #include "common/page.h"
+#include "common/err.h"
 #include "infect.h"
 #include "infect-priv.h"
 
@@ -303,34 +304,59 @@ out_free:
 	return -1; /* still failing the checkpoint */
 }
 
-static int __get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
-{
-	pr_info("Dumping GP/FPU registers for %d\n", pid);
+/*
+ * This is inspired by kernel function check_syscall_restart in
+ * arch/powerpc/kernel/signal.c
+ */
 
-	/*
-	 * This is inspired by kernel function check_syscall_restart in
-	 * arch/powerpc/kernel/signal.c
-	 */
 #ifndef TRAP
 #define TRAP(r) ((r).trap & ~0xF)
 #endif
 
-	if (TRAP(*regs) == 0x0C00 && regs->ccr & 0x10000000) {
-		/* Restart the system call */
-		switch (regs->gpr[3]) {
-		case ERESTARTNOHAND:
-		case ERESTARTSYS:
-		case ERESTARTNOINTR:
-			regs->gpr[3] = regs->orig_gpr3;
-			regs->nip -= 4;
-			break;
-		case ERESTART_RESTARTBLOCK:
-			pr_warn("Will restore %d with interrupted system call\n", pid);
-			regs->gpr[3] = EINTR;
-			break;
-		}
+static bool trap_is_scv(user_regs_struct_t *regs)
+{
+	return TRAP(*regs) == 0x3000;
+}
+
+static bool trap_is_syscall(user_regs_struct_t *regs)
+{
+	return trap_is_scv(regs) || TRAP(*regs) == 0x0C00;
+}
+
+static void handle_syscall(pid_t pid, user_regs_struct_t *regs)
+{
+	unsigned long ret = regs->gpr[3];
+
+	if (trap_is_scv(regs)) {
+		if (!IS_ERR_VALUE(ret))
+			return;
+		ret = -ret;
+	} else if (!(regs->ccr & 0x10000000)) {
+		return;
 	}
 
+	/* Restart or interrupt the system call */
+	switch (ret) {
+	case ERESTARTNOHAND:
+	case ERESTARTSYS:
+	case ERESTARTNOINTR:
+		regs->gpr[3] = regs->orig_gpr3;
+		regs->nip -= 4;
+		break;
+	case ERESTART_RESTARTBLOCK:
+		pr_warn("Will restore %d with interrupted system call\n", pid);
+		regs->gpr[3] = trap_is_scv(regs) ? -EINTR : EINTR;
+		break;
+	}
+}
+
+static int __get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
+{
+	pr_info("Dumping GP/FPU registers for %d\n", pid);
+
+	if (trap_is_syscall(regs))
+		handle_syscall(pid, regs);
+
 	/* Resetting trap since we are now coming from user space. */
 	regs->trap = 0;
 
@@ -365,17 +391,16 @@ static int __get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_stru
 	return 0;
 }
 
-int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs, save_regs_t save,
 			 void *arg, __maybe_unused unsigned long flags)
 {
-	user_fpregs_struct_t tmp, *fpregs = ext_regs ? ext_regs : &tmp;
 	int ret;
 
 	ret = __get_task_regs(pid, regs, fpregs);
 	if (ret)
 		return ret;
 
-	return save(arg, regs, fpregs);
+	return save(pid, arg, regs, fpregs);
 }
 
 int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
@@ -441,13 +466,13 @@ void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot,
 void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
 {
 	/*
-	 * OpenPOWER ABI requires that r12 is set to the calling function addressi
+	 * OpenPOWER ABI requires that r12 is set to the calling function address
 	 * to compute the TOC pointer.
 	 */
 	regs->gpr[12] = new_ip;
 	regs->nip = new_ip;
 	if (stack)
-		regs->gpr[1] = (unsigned long)stack;
+		regs->gpr[1] = (unsigned long)stack - STACK_FRAME_MIN_SIZE;
 	regs->trap = 0;
 }
 
diff --git a/compel/arch/riscv64/plugins/include/asm/prologue.h b/compel/arch/riscv64/plugins/include/asm/prologue.h
new file mode 100644
index 000000000..5c22b7b06
--- /dev/null
+++ b/compel/arch/riscv64/plugins/include/asm/prologue.h
@@ -0,0 +1,35 @@
+#ifndef __ASM_PROLOGUE_H__
+#define __ASM_PROLOGUE_H__
+
+#ifndef __ASSEMBLY__
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include <errno.h>
+
+#define sys_recv(sockfd, ubuf, size, flags) sys_recvfrom(sockfd, ubuf, size, flags, NULL, NULL)
+
+typedef struct prologue_init_args {
+	struct sockaddr_un ctl_sock_addr;
+	unsigned int ctl_sock_addr_len;
+
+	unsigned int arg_s;
+	void *arg_p;
+
+	void *sigframe;
+} prologue_init_args_t;
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Reserve enough space for sigframe.
+ *
+ * FIXME It is rather should be taken from sigframe header.
+ */
+#define PROLOGUE_SGFRAME_SIZE 4096
+
+#define PROLOGUE_INIT_ARGS_SIZE 1024
+
+#endif /* __ASM_PROLOGUE_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/include/asm/syscall-types.h b/compel/arch/riscv64/plugins/include/asm/syscall-types.h
new file mode 100644
index 000000000..b9740a9ee
--- /dev/null
+++ b/compel/arch/riscv64/plugins/include/asm/syscall-types.h
@@ -0,0 +1,28 @@
+#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__
+#define COMPEL_ARCH_SYSCALL_TYPES_H__
+
+#define SA_RESTORER 0x04000000
+
+typedef void rt_signalfn_t(int, siginfo_t *, void *);
+typedef rt_signalfn_t *rt_sighandler_t;
+
+typedef void rt_restorefn_t(void);
+typedef rt_restorefn_t *rt_sigrestore_t;
+
+#define _KNSIG	  64 // number of signals
+#define _NSIG_BPW 64 // number of signals per word
+
+#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
+
+typedef struct {
+	unsigned long sig[_KNSIG_WORDS];
+} k_rtsigset_t;
+
+typedef struct {
+	rt_sighandler_t rt_sa_handler;
+	unsigned long rt_sa_flags;
+	rt_sigrestore_t rt_sa_restorer;
+	k_rtsigset_t rt_sa_mask;
+} rt_sigaction_t;
+
+#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/include/features.h b/compel/arch/riscv64/plugins/include/features.h
new file mode 100644
index 000000000..274cee52a
--- /dev/null
+++ b/compel/arch/riscv64/plugins/include/features.h
@@ -0,0 +1,4 @@
+#ifndef __COMPEL_ARCH_FEATURES_H
+#define __COMPEL_ARCH_FEATURES_H
+
+#endif /* __COMPEL_ARCH_FEATURES_H */
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/parasite-head.S b/compel/arch/riscv64/plugins/std/parasite-head.S
new file mode 100644
index 000000000..3e9d272e3
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/parasite-head.S
@@ -0,0 +1,7 @@
+#include "common/asm/linkage.h"
+
+	.section .head.text, "ax"
+ENTRY(__export_parasite_head_start)
+	jal parasite_service
+	ebreak
+END(__export_parasite_head_start)
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/Makefile.syscalls b/compel/arch/riscv64/plugins/std/syscalls/Makefile.syscalls
new file mode 100644
index 000000000..5af35bcb4
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/Makefile.syscalls
@@ -0,0 +1,59 @@
+ccflags-y		+= -iquote $(PLUGIN_ARCH_DIR)/std/syscalls/
+asflags-y		+= -iquote $(PLUGIN_ARCH_DIR)/std/syscalls/
+
+sys-types		:= $(obj)/include/uapi/std/syscall-types.h
+sys-codes		:= $(obj)/include/uapi/std/syscall-codes.h
+sys-proto		:= $(obj)/include/uapi/std/syscall.h
+
+sys-def			:= $(PLUGIN_ARCH_DIR)/std/syscalls/syscall.def
+sys-asm-common-name	:= std/syscalls/syscall-common.S
+sys-asm-common		:= $(PLUGIN_ARCH_DIR)/$(sys-asm-common-name)
+sys-asm-types		:= $(obj)/include/uapi/std/asm/syscall-types.h
+sys-exec-tbl		 = $(PLUGIN_ARCH_DIR)/std/sys-exec-tbl.c
+
+sys-gen			:= $(PLUGIN_ARCH_DIR)/std/syscalls/gen-syscalls.pl
+sys-gen-tbl		:= $(PLUGIN_ARCH_DIR)/std/syscalls/gen-sys-exec-tbl.pl
+
+sys-asm			:= ./$(PLUGIN_ARCH_DIR)/std/syscalls/syscalls.S
+std-lib-y		+= $(sys-asm:.S=).o
+
+ifeq ($(ARCH),arm)
+arch_bits		:= 32
+else
+arch_bits		:= 64
+endif
+
+sys-exec-tbl		:= sys-exec-tbl.c
+
+$(sys-asm) $(sys-types) $(sys-codes) $(sys-proto): $(sys-gen) $(sys-def) $(sys-asm-common) $(sys-asm-types)
+	$(E) "  GEN     " $@
+	$(Q) perl				\
+		$(sys-gen)			\
+		$(sys-def)			\
+		$(sys-codes)			\
+		$(sys-proto)			\
+		$(sys-asm)			\
+		$(sys-asm-common-name)		\
+		$(sys-types)			\
+		$(arch_bits)
+
+$(sys-asm:.S=).o: $(sys-asm)
+
+$(sys-exec-tbl): $(sys-gen-tbl) $(sys-def)
+	$(E) "  GEN     " $@
+	$(Q) perl				\
+		$(sys-gen-tbl)			\
+		$(sys-def)			\
+		$(sys-exec-tbl)			\
+		$(arch_bits)
+
+$(sys-asm-types): $(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h
+	$(call msg-gen, $@)
+	$(Q) ln -s ../../../../../../$(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h $(sys-asm-types)
+	$(Q) ln -s ../../../../../$(PLUGIN_ARCH_DIR)/std/syscalls/syscall-aux.S $(obj)/include/uapi/std/syscall-aux.S
+	$(Q) ln -s ../../../../../$(PLUGIN_ARCH_DIR)/std/syscalls/syscall-aux.h $(obj)/include/uapi/std/syscall-aux.h
+
+std-headers-deps	+= $(sys-asm) $(sys-codes) $(sys-proto) $(sys-asm-types) $(sys-codes)
+mrproper-y		+= $(std-headers-deps)
+mrproper-y		+= $(obj)/include/uapi/std/syscall-aux.S
+mrproper-y		+= $(obj)/include/uapi/std/syscall-aux.h
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/gen-sys-exec-tbl.pl b/compel/arch/riscv64/plugins/std/syscalls/gen-sys-exec-tbl.pl
new file mode 100755
index 000000000..61a807eb6
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/gen-sys-exec-tbl.pl
@@ -0,0 +1,43 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+my $in		= $ARGV[0];
+my $tblout	= $ARGV[1];
+my $bits	= $ARGV[2];
+
+my $code	= "code$bits";
+
+open TBLOUT,	">", $tblout	or die $!;
+open IN,		"<", $in		or die $!;
+
+print TBLOUT "/* Autogenerated, don't edit */\n";
+print TBLOUT "static struct syscall_exec_desc sc_exec_table[] = {\n";
+
+for (<IN>) {
+	if ($_ =~ /\#/) {
+		next;
+	}
+
+	my $sys_name;
+	my $sys_num;
+
+	if (/(?<name>\S+)\s+(?<alias>\S+)\s+(?<code64>\d+|\!)\s+(?<code32>(?:\d+|\!))\s+\((?<args>.+)\)/) {
+		$sys_name = $+{alias};
+	} elsif (/(?<name>\S+)\s+(?<code64>\d+|\!)\s+(?<code32>(?:\d+|\!))\s+\((?<args>.+)\)/) {
+		$sys_name = $+{name};
+	} else {
+		unlink $tblout;
+		die "Invalid syscall definition file: invalid entry $_\n";
+	}
+
+	$sys_num = $+{$code};
+
+	if ($sys_num ne "!") {
+		print TBLOUT "SYSCALL($sys_name, $sys_num)\n";
+	}
+}
+
+print TBLOUT " 	{ }, /* terminator */";
+print TBLOUT "};"
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/gen-syscalls.pl b/compel/arch/riscv64/plugins/std/syscalls/gen-syscalls.pl
new file mode 100755
index 000000000..a53f1962f
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/gen-syscalls.pl
@@ -0,0 +1,99 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+my $in         = $ARGV[0];
+my $codesout   = $ARGV[1];
+my $codes      = $ARGV[1];
+$codes         =~ s/.*include\/uapi\//compel\/plugins\//g;
+my $protosout  = $ARGV[2];
+my $protos     = $ARGV[2];
+$protos        =~ s/.*include\/uapi\//compel\/plugins\//g;
+my $asmout     = $ARGV[3];
+my $asmcommon  = $ARGV[4];
+my $prototypes = $ARGV[5];
+$prototypes    =~ s/.*include\/uapi\//compel\/plugins\//g;
+my $bits       = $ARGV[6];
+
+my $codesdef   = $codes;
+$codesdef      =~ tr/.\-\//_/;
+my $protosdef  = $protos;
+$protosdef     =~ tr/.\-\//_/;
+my $code       = "code$bits";
+my $need_aux   = 0;
+
+unlink $codesout;
+unlink $protosout;
+unlink $asmout;
+
+open CODESOUT,	">", $codesout	or die $!;
+open PROTOSOUT, ">", $protosout or die $!;
+open ASMOUT,	">", $asmout	or die $!;
+open IN,	"<", $in	or die $!;
+
+print CODESOUT <<"END";
+/* Autogenerated, don't edit */
+#ifndef $codesdef
+#define $codesdef
+END
+
+print PROTOSOUT <<"END";
+/* Autogenerated, don't edit */
+#ifndef $protosdef
+#define $protosdef
+#include <$prototypes>
+#include <$codes>
+END
+
+print ASMOUT <<"END";
+/* Autogenerated, don't edit */
+#include <$codes>
+#include "$asmcommon"
+END
+
+
+for (<IN>) {
+	if ($_ =~ /\#/) {
+		next;
+	}
+
+	my $code_macro;
+	my $sys_macro;
+	my $sys_name;
+
+	if (/(?<name>\S+)\s+(?<alias>\S+)\s+(?<code64>\d+|\!)\s+(?<code32>(?:\d+|\!))\s+\((?<args>.+)\)/) {
+		$code_macro = "__NR_$+{name}";
+		$sys_macro  = "SYS_$+{name}";
+		$sys_name   = "sys_$+{alias}";
+	} elsif (/(?<name>\S+)\s+(?<code64>\d+|\!)\s+(?<code32>(?:\d+|\!))\s+\((?<args>.+)\)/) {
+		$code_macro = "__NR_$+{name}";
+		$sys_macro  = "SYS_$+{name}";
+		$sys_name   = "sys_$+{name}";
+	} else {
+		unlink $codesout;
+		unlink $protosout;
+		unlink $asmout;
+
+		die "Invalid syscall definition file: invalid entry $_\n";
+	}
+
+	if ($+{$code} ne "!") {
+		print CODESOUT "#ifndef $code_macro\n#define $code_macro $+{$code}\n#endif\n";
+		print CODESOUT "#ifndef $sys_macro\n#define $sys_macro $code_macro\n#endif\n";
+		print ASMOUT "syscall $sys_name, $code_macro\n";
+
+	} else {
+		$need_aux = 1;
+	}
+
+	print PROTOSOUT "extern long $sys_name($+{args});\n";
+}
+
+if ($need_aux == 1) {
+	print ASMOUT   "#include <compel/plugins/std/syscall-aux.S>\n";
+	print CODESOUT "#include <compel/plugins/std/syscall-aux.h>\n";
+}
+
+print CODESOUT  "#endif /* $codesdef */";
+print PROTOSOUT "#endif /* $protosdef */";
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.S b/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.S
new file mode 100644
index 000000000..04160b7ac
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.S
@@ -0,0 +1,37 @@
+/**
+ * This source contains emulation of syscalls
+ * that are not implemented in the riscv64 Linux kernel
+ */
+
+ENTRY(sys_open)
+	add a3, x0, a2
+	add a2, x0, a1
+	add a1, x0, a0
+	addi a0, x0, -100
+	j   sys_openat
+END(sys_open)
+
+
+ENTRY(sys_mkdir)
+	add a3,x0, a2
+	add a2, x0, a1
+	add a1, x0, a0
+	addi a0, x0, -100
+	j   sys_mkdirat
+END(sys_mkdir)
+
+
+ENTRY(sys_rmdir)
+	addi a2, x0, 0x200		// flags = AT_REMOVEDIR
+	add a1, x0, a0
+	addi a0, x0, -100
+	j   sys_unlinkat
+END(sys_rmdir)
+
+
+ENTRY(sys_unlink)
+	addi a2, x0, 0		// flags = 0
+	add a1, x0, a0
+	addi a0, x0, -100
+	j   sys_unlinkat
+END(sys_unlink)
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.h b/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.h
new file mode 100644
index 000000000..881765bbb
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.h
@@ -0,0 +1,3 @@
+#ifndef __NR_openat
+#define __NR_openat 56
+#endif
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/syscall-common.S b/compel/arch/riscv64/plugins/std/syscalls/syscall-common.S
new file mode 100644
index 000000000..fdef3b47a
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/syscall-common.S
@@ -0,0 +1,17 @@
+#include "common/asm/linkage.h"
+
+syscall_common:
+	ecall
+	ret
+
+.macro syscall name, nr
+	ENTRY(\name)
+		li	a7, \nr
+		j	syscall_common
+	END(\name)
+.endm
+
+ENTRY(__cr_restore_rt)
+	li	a7, __NR_rt_sigreturn
+	ecall
+END(__cr_restore_rt)
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/syscall.def b/compel/arch/riscv64/plugins/std/syscalls/syscall.def
new file mode 100644
index 000000000..967f097f9
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/syscall.def
@@ -0,0 +1,125 @@
+#
+# System calls table, please make sure the table consists of only the syscalls
+# really used somewhere in the project.
+#
+# The template is (name and arguments are optional if you need only __NR_x
+# defined, but no real entry point in syscalls lib).
+#
+# name/alias			code64	code32	arguments
+# -----------------------------------------------------------------------
+#
+read				63	3	(int fd, void *buf, unsigned long count)
+write				64	4	(int fd, const void *buf, unsigned long count)
+open				!	5	(const char *filename, unsigned long flags, unsigned long mode)
+close				57	6	(int fd)
+lseek				62	19	(int fd, unsigned long offset, unsigned long origin)
+mmap				222	!	(void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset)
+mprotect			226	125	(const void *addr, unsigned long len, unsigned long prot)
+munmap				215	91	(void *addr, unsigned long len)
+brk				214	45	(void *addr)
+rt_sigaction	sigaction	134	174	(int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact, size_t sigsetsize)
+rt_sigprocmask	sigprocmask	135	175	(int how, k_rtsigset_t *set, k_rtsigset_t *old, size_t sigsetsize)
+rt_sigreturn			139	173	(void)
+ioctl				29	54	(unsigned int fd, unsigned int cmd, unsigned long arg)
+pread64				67	180	(unsigned int fd, char *buf, size_t count, loff_t pos)
+ptrace				117	26	(long request, pid_t pid, void *addr, void *data)
+mremap				216	163	(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flag, unsigned long new_addr)
+mincore				232	219	(void *addr, unsigned long size, unsigned char *vec)
+madvise				233	220	(unsigned long start, size_t len, int behavior)
+shmat				196	305	(int shmid, void *shmaddr, int shmflag)
+pause				1061	29	(void)
+nanosleep			101	162	(struct timespec *req, struct timespec *rem)
+getitimer			102	105	(int which, const struct itimerval *val)
+setitimer			103	104	(int which, const struct itimerval *val, struct itimerval *old)
+getpid				172	20	(void)
+socket				198	281	(int domain, int type, int protocol)
+connect				203	283	(int sockfd, struct sockaddr *addr, int addrlen)
+sendto				206	290	(int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len)
+recvfrom			207	292	(int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len)
+sendmsg				211	296	(int sockfd, const struct msghdr *msg, int flags)
+recvmsg				212	297	(int sockfd, struct msghdr *msg, int flags)
+shutdown			210	293	(int sockfd, int how)
+bind				235	282	(int sockfd, const struct sockaddr *addr, int addrlen)
+setsockopt			208	294	(int sockfd, int level, int optname, const void *optval, socklen_t optlen)
+getsockopt			209	295	(int sockfd, int level, int optname, const void *optval, socklen_t *optlen)
+clone				220	120	(unsigned long flags, void *child_stack, void *parent_tid, unsigned long newtls, void *child_tid)
+exit				93	1	(unsigned long error_code)
+wait4				260	114	(int pid, int *status, int options, struct rusage *ru)
+waitid				95	280	(int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
+kill				129	37	(long pid, int sig)
+fcntl				25	55	(int fd, int type, long arg)
+flock				32	143	(int fd, unsigned long cmd)
+mkdir				!	39	(const char *name, int mode)
+rmdir				!	40	(const char *name)
+unlink				!	10	(char *pathname)
+readlinkat			78	332	(int fd, const char *path, char *buf, int bufsize)
+umask				166	60	(int mask)
+getgroups			158	205	(int gsize, unsigned int *groups)
+setgroups			159	206	(int gsize, unsigned int *groups)
+setresuid			147	164	(int uid, int euid, int suid)
+getresuid			148	165	(int *uid, int *euid, int *suid)
+setresgid			149	170	(int gid, int egid, int sgid)
+getresgid			150	171	(int *gid, int *egid, int *sgid)
+getpgid				155	132	(pid_t pid)
+setfsuid			151	138	(int fsuid)
+setfsgid			152	139	(int fsgid)
+getsid				156	147	(void)
+capget				90	184	(struct cap_header *h, struct cap_data *d)
+capset				91	185	(struct cap_header *h, struct cap_data *d)
+rt_sigqueueinfo			138	178	(pid_t pid, int sig, siginfo_t *info)
+setpriority			140	97	(int which, int who, int nice)
+sched_setscheduler		119	156	(int pid, int policy, struct sched_param *p)
+sigaltstack			132	186	(const void *uss, void *uoss)
+personality			92	136	(unsigned int personality)
+prctl				167	172	(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
+arch_prctl			!	17	(int option, unsigned long addr)
+setrlimit			164	75	(int resource, struct krlimit *rlim)
+mount				40	21	(char *dev_nmae, char *dir_name, char *type, unsigned long flags, void *data)
+umount2				39	52	(char *name, int flags)
+gettid				178	224	(void)
+futex				98	240	(uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3)
+set_tid_address			96	256	(int *tid_addr)
+restart_syscall			128	0	(void)
+timer_create			107	257	(clockid_t which_clock, struct sigevent *timer_event_spec, kernel_timer_t *created_timer_id)
+timer_settime			110	258	(kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting)
+timer_gettime			108	259	(int timer_id, const struct itimerspec *setting)
+timer_getoverrun		109	260	(int timer_id)
+timer_delete			111	261	(kernel_timer_t timer_id)
+clock_gettime			113	263	(clockid_t which_clock, struct timespec *tp)
+exit_group			94	248	(int error_code)
+set_robust_list			99	338	(struct robust_list_head *head, size_t len)
+get_robust_list			100	339	(int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
+signalfd4			74	355	(int fd, k_rtsigset_t *mask, size_t sizemask, int flags)
+rt_tgsigqueueinfo		240	363	(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+vmsplice			75	343	(int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags)
+timerfd_settime			86	353	(int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr)
+fanotify_init			262	367	(unsigned int flags, unsigned int event_f_flags)
+fanotify_mark			263	368	(int fanotify_fd, unsigned int flags, uint64_t mask, int dfd, const char *pathname)
+open_by_handle_at		265	371	(int mountdirfd, struct file_handle *handle, int flags)
+setns				268	375	(int fd, int nstype)
+kcmp				272	378	(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
+openat				56	322	(int dirfd, const char *pathname, int flags, mode_t mode)
+mkdirat				34	323	(int dirfd, const char *pathname, mode_t mode)
+unlinkat			35	328	(int dirfd, const char *pathname, int flags)
+memfd_create			279	385	(const char *name, unsigned int flags)
+io_setup			0	243	(unsigned nr_events, aio_context_t *ctx)
+io_submit			2	246	(aio_context_t ctx_id, long nr, struct iocb **iocbpp)
+io_getevents			4	245	(aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo)
+seccomp				277	383	(unsigned int op, unsigned int flags, const char *uargs)
+gettimeofday			169	78	(struct timeval *tv, struct timezone *tz)
+preadv_raw			69	361	(int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h)
+userfaultfd			282	388	(int flags)
+fallocate			47	352	(int fd, int mode, loff_t offset, loff_t len)
+cacheflush			!	983042	(void *start, void *end, int flags)
+ppoll				73	336	(struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)
+fsopen				430	430	(char *fsname, unsigned int flags)
+fsconfig			431	431	(int fd, unsigned int cmd, const char *key, const char *value, int aux)
+fsmount				432	432	(int fd, unsigned int flags, unsigned int attr_flags)
+clone3				435	435	(struct clone_args *uargs, size_t size)
+pidfd_open			434	434	(pid_t pid, unsigned int flags)
+pidfd_getfd			438	438	(int pidfd, int targetfd, unsigned int flags)
+rseq				293 	293 	(void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+move_mount          		429	429	(int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags)
+open_tree			428	428	(int dirfd, const char *pathname, unsigned int flags)
+openat2				437	437	(int dirfd, char *pathname, struct open_how *how, size_t size)
+membarrier 			283	283	(int cmd, unsigned int flags, int cpu_id)
diff --git a/compel/arch/riscv64/scripts/compel-pack.lds.S b/compel/arch/riscv64/scripts/compel-pack.lds.S
new file mode 100644
index 000000000..a61235b44
--- /dev/null
+++ b/compel/arch/riscv64/scripts/compel-pack.lds.S
@@ -0,0 +1,32 @@
+OUTPUT_ARCH(riscv)
+EXTERN(__export_parasite_head_start)
+
+SECTIONS
+{
+	.crblob 0x0 : {
+		*(.head.text)
+		ASSERT(DEFINED(__export_parasite_head_start),
+			"Symbol __export_parasite_head_start is missing");
+		*(.text*)
+		. = ALIGN(32);
+		*(.data*)
+		. = ALIGN(32);
+		*(.rodata*)
+		. = ALIGN(32);
+		*(.bss*)
+		. = ALIGN(32);
+		*(.got*)
+		. = ALIGN(32);
+		*(.toc*)
+		. = ALIGN(32);
+	} =0x00000000,
+
+	/DISCARD/ : {
+		*(.debug*)
+		*(.comment*)
+		*(.note*)
+		*(.group*)
+		*(.eh_frame*)
+		*(*)
+	}
+}
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/cpu.c b/compel/arch/riscv64/src/lib/cpu.c
new file mode 100644
index 000000000..9a0291f70
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/cpu.c
@@ -0,0 +1,78 @@
+#include <string.h>
+#include <stdbool.h>
+
+#include "compel-cpu.h"
+
+#include "common/bitops.h"
+
+#include "log.h"
+
+#undef LOG_PREFIX
+#define LOG_PREFIX "cpu: "
+
+static compel_cpuinfo_t rt_info;
+
+static void fetch_rt_cpuinfo(void)
+{
+	static bool rt_info_done = false;
+
+	if (!rt_info_done) {
+		compel_cpuid(&rt_info);
+		rt_info_done = true;
+	}
+}
+
+void compel_set_cpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+}
+void compel_clear_cpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+}
+int compel_test_cpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+	return 0;
+}
+int compel_test_fpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+	return 0;
+}
+int compel_cpuid(compel_cpuinfo_t *info)
+{
+	return 0;
+}
+
+bool compel_cpu_has_feature(unsigned int feature)
+{
+	fetch_rt_cpuinfo();
+	return compel_test_cpu_cap(&rt_info, feature);
+}
+
+bool compel_fpu_has_feature(unsigned int feature)
+{
+	fetch_rt_cpuinfo();
+	return compel_test_fpu_cap(&rt_info, feature);
+}
+
+uint32_t compel_fpu_feature_size(unsigned int feature)
+{
+	fetch_rt_cpuinfo();
+	return 0;
+}
+
+uint32_t compel_fpu_feature_offset(unsigned int feature)
+{
+	fetch_rt_cpuinfo();
+	return 0;
+}
+
+void compel_cpu_clear_feature(unsigned int feature)
+{
+	fetch_rt_cpuinfo();
+	return compel_clear_cpu_cap(&rt_info, feature);
+}
+
+void compel_cpu_copy_cpuinfo(compel_cpuinfo_t *c)
+{
+	fetch_rt_cpuinfo();
+	memcpy(c, &rt_info, sizeof(rt_info));
+}
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/handle-elf-host.c b/compel/arch/riscv64/src/lib/handle-elf-host.c
new file mode 120000
index 000000000..fe4611886
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/handle-elf-host.c
@@ -0,0 +1 @@
+handle-elf.c
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/handle-elf.c b/compel/arch/riscv64/src/lib/handle-elf.c
new file mode 100644
index 000000000..22420bc78
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/handle-elf.c
@@ -0,0 +1,32 @@
+#include <string.h>
+#include <errno.h>
+
+#include "handle-elf.h"
+#include "piegen.h"
+#include "log.h"
+
+static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = {
+	0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const unsigned char __maybe_unused elf_ident_64_be[EI_NIDENT] = {
+	0x7f, 0x45, 0x4c, 0x46, 0x02, 0x02, 0x01, 0x00, /* clang-format */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+int handle_binary(void *mem, size_t size)
+{
+	const unsigned char *elf_ident =
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+		elf_ident_64_le;
+#else
+		elf_ident_64_be;
+#endif
+
+	if (memcmp(mem, elf_ident, sizeof(elf_ident_64_le)) == 0)
+		return handle_elf_riscv64(mem, size);
+
+	pr_err("Unsupported Elf format detected\n");
+	return -EINVAL;
+}
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/cpu.h b/compel/arch/riscv64/src/lib/include/cpu.h
new file mode 100644
index 000000000..e69de29bb
diff --git a/compel/arch/riscv64/src/lib/include/handle-elf.h b/compel/arch/riscv64/src/lib/include/handle-elf.h
new file mode 100644
index 000000000..582770583
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/handle-elf.h
@@ -0,0 +1,12 @@
+#ifndef COMPEL_HANDLE_ELF_H__
+#define COMPEL_HANDLE_ELF_H__
+
+#include "elf64-types.h"
+
+#define __handle_elf handle_elf_riscv64
+#define ELF_RISCV
+#define arch_is_machine_supported(e_machine) (e_machine == EM_RISCV)
+
+extern int handle_elf_riscv64(void *mem, size_t size);
+
+#endif /* COMPEL_HANDLE_ELF_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/syscall.h b/compel/arch/riscv64/src/lib/include/syscall.h
new file mode 100644
index 000000000..53f10525d
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/syscall.h
@@ -0,0 +1,8 @@
+#ifndef __COMPEL_SYSCALL_H__
+#define __COMPEL_SYSCALL_H__
+#define __NR(syscall, compat)   \
+	({                      \
+		(void)compat;   \
+		__NR_##syscall; \
+	})
+#endif
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/breakpoints.h b/compel/arch/riscv64/src/lib/include/uapi/asm/breakpoints.h
new file mode 100644
index 000000000..f2ba799cb
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/breakpoints.h
@@ -0,0 +1,15 @@
+#ifndef __COMPEL_BREAKPOINTS_H__
+#define __COMPEL_BREAKPOINTS_H__
+#define ARCH_SI_TRAP TRAP_BRKPT
+
+static inline int ptrace_set_breakpoint(pid_t pid, void *addr)
+{
+	return 0;
+}
+
+static inline int ptrace_flush_breakpoints(pid_t pid)
+{
+	return 0;
+}
+
+#endif
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/cpu.h b/compel/arch/riscv64/src/lib/include/uapi/asm/cpu.h
new file mode 100644
index 000000000..ac58567e3
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/cpu.h
@@ -0,0 +1,7 @@
+#ifndef UAPI_COMPEL_ASM_CPU_H__
+#define UAPI_COMPEL_ASM_CPU_H__
+
+typedef struct {
+} compel_cpuinfo_t;
+
+#endif /* UAPI_COMPEL_ASM_CPU_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/fpu.h b/compel/arch/riscv64/src/lib/include/uapi/asm/fpu.h
new file mode 100644
index 000000000..a74decc23
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/fpu.h
@@ -0,0 +1,4 @@
+#ifndef __CR_ASM_FPU_H__
+#define __CR_ASM_FPU_H__
+
+#endif /* __CR_ASM_FPU_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/riscv64/src/lib/include/uapi/asm/infect-types.h
new file mode 100644
index 000000000..192810cac
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/infect-types.h
@@ -0,0 +1,52 @@
+#ifndef UAPI_COMPEL_ASM_TYPES_H__
+#define UAPI_COMPEL_ASM_TYPES_H__
+
+#include <stdint.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <asm/ptrace.h>
+
+#define SIGMAX	   64
+#define SIGMAX_OLD 31
+
+/*
+ * Copied from the Linux kernel header arch/riscv/include/uapi/asm/ptrace.h
+ *
+ * A thread RISC-V CPU context
+ */
+typedef struct user_regs_struct user_regs_struct_t;
+typedef struct __riscv_d_ext_state user_fpregs_struct_t;
+
+#define __compel_arch_fetch_thread_area(tid, th) 0
+#define compel_arch_fetch_thread_area(tctl)	 0
+#define compel_arch_get_tls_task(ctl, tls)
+#define compel_arch_get_tls_thread(tctl, tls)
+
+#define REG_RES(registers)	   ((uint64_t)(registers).a0)
+#define REG_IP(registers)	   ((uint64_t)(registers).pc)
+#define SET_REG_IP(registers, val) ((registers).pc = (val))
+
+/*
+ * REG_SP is also defined in riscv64-linux-gnu/include/sys/ucontext.h
+ * with a different meaning, and it's not used in CRIU. So we have to
+ * undefine it here.
+ */
+#ifdef REG_SP
+#undef REG_SP
+#endif
+
+#define REG_SP(registers) ((uint64_t)((registers).sp))
+
+#define REG_SYSCALL_NR(registers) ((uint64_t)(registers).a7)
+
+#define user_regs_native(pregs) true
+
+#define ARCH_SI_TRAP TRAP_BRKPT
+
+#define __NR(syscall, compat)   \
+	({                      \
+		(void)compat;   \
+		__NR_##syscall; \
+	})
+
+#endif /* UAPI_COMPEL_ASM_TYPES_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/instruction_formats.h b/compel/arch/riscv64/src/lib/include/uapi/asm/instruction_formats.h
new file mode 100644
index 000000000..e231d0465
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/instruction_formats.h
@@ -0,0 +1,26 @@
+#ifndef COMPEL_RELOCATIONS_H__
+#define COMPEL_RELOCATIONS_H__
+
+#include <stdint.h>
+
+static inline uint32_t riscv_b_imm(uint32_t val)
+{
+	return (val & 0x00001000) << 19 | (val & 0x000007e0) << 20 | (val & 0x0000001e) << 7 | (val & 0x00000800) >> 4;
+}
+
+static inline uint32_t riscv_i_imm(uint32_t val)
+{
+	return val << 20;
+}
+
+static inline uint32_t riscv_u_imm(uint32_t val)
+{
+	return val & 0xfffff000;
+}
+
+static inline uint32_t riscv_j_imm(uint32_t val)
+{
+	return (val & 0x00100000) << 11 | (val & 0x000007fe) << 20 | (val & 0x00000800) << 9 | (val & 0x000ff000);
+}
+
+#endif /* COMPEL_RELOCATIONS_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/processor-flags.h b/compel/arch/riscv64/src/lib/include/uapi/asm/processor-flags.h
new file mode 100644
index 000000000..e40fb6fce
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/processor-flags.h
@@ -0,0 +1,4 @@
+#ifndef UAPI_COMPEL_ASM_PROCESSOR_FLAGS_H__
+#define UAPI_COMPEL_ASM_PROCESSOR_FLAGS_H__
+
+#endif /* UAPI_COMPEL_ASM_PROCESSOR_FLAGS_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/riscv64/src/lib/include/uapi/asm/sigframe.h
new file mode 100644
index 000000000..761a08f62
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/sigframe.h
@@ -0,0 +1,68 @@
+#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__
+#define UAPI_COMPEL_ASM_SIGFRAME_H__
+
+#include <sys/ucontext.h>
+
+#include <stdint.h>
+
+#include <signal.h>
+
+/* Copied from the kernel header arch/riscv/include/uapi/asm/sigcontext.h */
+/*
+ * Signal context structure
+ *
+ * This contains the context saved before a signal handler is invoked;
+ * it is restored by sys_sigreturn / sys_rt_sigreturn.
+ */
+// struct sigcontext {
+// 	struct user_regs_struct sc_regs;
+// 	union __riscv_fp_state sc_fpregs;
+// 	/*
+// 	 * 4K + 128 reserved for vector state and future expansion.
+// 	 * This space is enough to store the vector context whose VLENB
+// 	 * is less or equal to 128.
+// 	 * (The size of the vector context is 4144 byte as VLENB is 128)
+// 	 */
+// 	__u8 __reserved[4224] __attribute__((__aligned__(16)));
+// };
+
+#define rt_sigcontext sigcontext
+
+#include <compel/sigframe-common.h>
+
+/* Copied from the kernel source arch/riscv/kernel/signal.c */
+struct rt_sigframe {
+	siginfo_t info;
+	ucontext_t uc; //ucontext_t structure holds the user context, e.g., the signal mask, GP regs
+};
+
+/*
+	generates inline assembly code for triggering the rt_sigreturn system call.
+	used to return from a signal handler back to the normal execution flow of the process.
+*/
+/* clang-format off */
+#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe)					\
+	asm volatile(								\
+			"mv sp, %0\n"	\
+			"li a7,  "__stringify(__NR_rt_sigreturn)" \n"     \
+			"ecall\n"	\
+			:							\
+			: "r"(new_sp)						\
+			: "a7", "memory")
+/* clang-format on */
+
+#define RT_SIGFRAME_UC(rt_sigframe)	 (&rt_sigframe->uc)
+#define RT_SIGFRAME_REGIP(rt_sigframe)	 ((long unsigned int)(rt_sigframe)->uc.uc_mcontext.__gregs[REG_PC])
+#define RT_SIGFRAME_HAS_FPU(rt_sigframe) 1
+#define RT_SIGFRAME_OFFSET(rt_sigframe)	 0
+
+// #define RT_SIGFRAME_SIGCONTEXT(rt_sigframe)  ((struct cr_sigcontext *)&(rt_sigframe)->uc.uc_mcontext)
+// #define RT_SIGFRAME_AUX_CONTEXT(rt_sigframe) ((struct sigcontext *)&(RT_SIGFRAME_SIGCONTEXT(rt_sigframe)->__reserved))
+// #define RT_SIGFRAME_FPU(rt_sigframe)	     (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->fpsimd)
+
+#define rt_sigframe_erase_sigset(sigframe) \
+	memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t)) // erase the signal mask
+#define rt_sigframe_copy_sigset(sigframe, from) \
+	memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t)) // copy the signal mask
+
+#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/infect.c b/compel/arch/riscv64/src/lib/infect.c
new file mode 100644
index 000000000..3f3a4b7ec
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/infect.c
@@ -0,0 +1,224 @@
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <linux/elf.h>
+#include <compel/plugins/std/syscall-codes.h>
+#include "common/page.h"
+#include "uapi/compel/asm/infect-types.h"
+#include "log.h"
+#include "errno.h"
+#include "infect.h"
+#include "infect-priv.h"
+
+unsigned __page_size = 0;
+unsigned __page_shift = 0;
+
+/*
+ * Injected syscall instruction
+ */
+const char code_syscall[] = {
+	0x73, 0x00, 0x00, 0x00, /* ecall */
+	0x73, 0x00, 0x10, 0x00	/* ebreak */
+};
+
+static const int code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long));
+
+static inline void __always_unused __check_code_syscall(void)
+{
+	BUILD_BUG_ON(code_syscall_aligned != BUILTIN_SYSCALL_SIZE);
+	BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
+}
+
+int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
+{
+	sigframe->uc.uc_mcontext.__gregs[0] = regs->pc;
+	sigframe->uc.uc_mcontext.__gregs[1] = regs->ra;
+	sigframe->uc.uc_mcontext.__gregs[2] = regs->sp;
+	sigframe->uc.uc_mcontext.__gregs[3] = regs->gp;
+	sigframe->uc.uc_mcontext.__gregs[4] = regs->tp;
+	sigframe->uc.uc_mcontext.__gregs[5] = regs->t0;
+	sigframe->uc.uc_mcontext.__gregs[6] = regs->t1;
+	sigframe->uc.uc_mcontext.__gregs[7] = regs->t2;
+	sigframe->uc.uc_mcontext.__gregs[8] = regs->s0;
+	sigframe->uc.uc_mcontext.__gregs[9] = regs->s1;
+	sigframe->uc.uc_mcontext.__gregs[10] = regs->a0;
+	sigframe->uc.uc_mcontext.__gregs[11] = regs->a1;
+	sigframe->uc.uc_mcontext.__gregs[12] = regs->a2;
+	sigframe->uc.uc_mcontext.__gregs[13] = regs->a3;
+	sigframe->uc.uc_mcontext.__gregs[14] = regs->a4;
+	sigframe->uc.uc_mcontext.__gregs[15] = regs->a5;
+	sigframe->uc.uc_mcontext.__gregs[16] = regs->a6;
+	sigframe->uc.uc_mcontext.__gregs[17] = regs->a7;
+	sigframe->uc.uc_mcontext.__gregs[18] = regs->s2;
+	sigframe->uc.uc_mcontext.__gregs[19] = regs->s3;
+	sigframe->uc.uc_mcontext.__gregs[20] = regs->s4;
+	sigframe->uc.uc_mcontext.__gregs[21] = regs->s5;
+	sigframe->uc.uc_mcontext.__gregs[22] = regs->s6;
+	sigframe->uc.uc_mcontext.__gregs[23] = regs->s7;
+	sigframe->uc.uc_mcontext.__gregs[24] = regs->s8;
+	sigframe->uc.uc_mcontext.__gregs[25] = regs->s9;
+	sigframe->uc.uc_mcontext.__gregs[26] = regs->s10;
+	sigframe->uc.uc_mcontext.__gregs[27] = regs->s11;
+	sigframe->uc.uc_mcontext.__gregs[28] = regs->t3;
+	sigframe->uc.uc_mcontext.__gregs[29] = regs->t4;
+	sigframe->uc.uc_mcontext.__gregs[30] = regs->t5;
+	sigframe->uc.uc_mcontext.__gregs[31] = regs->t6;
+
+	memcpy(sigframe->uc.uc_mcontext.__fpregs.__d.__f, fpregs->f, sizeof(fpregs->f));
+	sigframe->uc.uc_mcontext.__fpregs.__d.__fcsr = fpregs->fcsr;
+
+	return 0;
+}
+
+int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe)
+{
+	return 0;
+}
+
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
+			 void *arg, __maybe_unused unsigned long flags)
+{
+	user_fpregs_struct_t tmp, *fpsimd = ext_regs ? ext_regs : &tmp;
+	struct iovec iov;
+	int ret = -1;
+
+	pr_info("Dumping FPU registers for %d\n", pid);
+
+	iov.iov_base = fpsimd;
+	iov.iov_len = sizeof(*fpsimd);
+	if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) {
+		pr_perror("Failed to obtain FPU registers for %d", pid);
+		return -1;
+	}
+
+	ret = save(pid, arg, regs, fpsimd);
+	return ret;
+}
+
+int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
+{
+	struct iovec iov;
+
+	pr_info("Restoring GP/FPU registers for %d\n", pid);
+
+	iov.iov_base = ext_regs;
+	iov.iov_len = sizeof(*ext_regs);
+	if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov)) {
+		pr_perror("Failed to set FPU registers for %d", pid);
+		return -1;
+	}
+	return 0;
+}
+
+int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1, unsigned long arg2,
+		   unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6)
+{
+	user_regs_struct_t regs = ctl->orig.regs;
+	int err;
+
+	regs.a7 = (unsigned long)nr;
+	regs.a0 = arg1;
+	regs.a1 = arg2;
+	regs.a2 = arg3;
+	regs.a3 = arg4;
+	regs.a4 = arg5;
+	regs.a5 = arg6;
+	regs.a6 = 0;
+
+	err = compel_execute_syscall(ctl, &regs, code_syscall);
+
+	*ret = regs.a0;
+	return err;
+}
+
+/*
+ * Calling the mmap system call in the context of the target (victim) process using the compel_syscall function.
+ * Used during the infection process to allocate memory for the parasite code.
+*/
+void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+	long map;
+	int err;
+
+	err = compel_syscall(ctl, __NR_mmap, &map, (unsigned long)addr, length, prot, flags, fd, offset);
+	if (err < 0 || (long)map < 0)
+		map = 0;
+
+	return (void *)map;
+}
+
+void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
+{
+	regs->pc = new_ip;
+	if (stack)
+		regs->sp = (unsigned long)stack;
+}
+
+bool arch_can_dump_task(struct parasite_ctl *ctl)
+{
+	/*
+	 * TODO: Add proper check here.
+	 */
+	return true;
+}
+
+/*
+ * Fetch the signal alternate stack (sigaltstack),
+ * sas is a separate memory area for the signal handler to run on,
+ * avoiding potential issues with the main process stack
+*/
+int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
+{
+	long ret;
+	int err;
+
+	err = compel_syscall(ctl, __NR_sigaltstack, &ret, 0, (unsigned long)&s->uc.uc_stack, 0, 0, 0, 0);
+	return err ? err : ret;
+}
+
+/*
+ * Task size is the maximum virtual address space size that a process can occupy in the memory
+ * Refer to linux kernel arch/riscv/include/asm/pgtable.h,
+ * task size is:
+ * -        0x9fc00000	(~2.5GB) for RV32.
+ * -      0x4000000000	( 256GB) for RV64 using SV39 mmu
+ * -    0x800000000000	( 128TB) for RV64 using SV48 mmu
+ * - 0x100000000000000	(  64PB) for RV64 using SV57 mmu
+ */
+#define TASK_SIZE_MIN (1UL << 38)
+#define TASK_SIZE_MAX (1UL << 56)
+
+unsigned long compel_task_size(void)
+{
+	unsigned long task_size;
+
+	for (task_size = TASK_SIZE_MIN; task_size < TASK_SIZE_MAX; task_size <<= 1)
+		if (munmap((void *)task_size, page_size()))
+			break;
+	return task_size;
+}
+
+/*
+ * Get task registers (overwrites weak function)
+ */
+int ptrace_get_regs(int pid, user_regs_struct_t *regs)
+{
+	struct iovec iov;
+
+	iov.iov_base = regs;
+	iov.iov_len = sizeof(user_regs_struct_t);
+	return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov);
+}
+
+/*
+ * Set task registers (overwrites weak function)
+ */
+int ptrace_set_regs(int pid, user_regs_struct_t *regs)
+{
+	struct iovec iov;
+
+	iov.iov_base = regs;
+	iov.iov_len = sizeof(user_regs_struct_t);
+	return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov);
+}
diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
index 6a349e1cb..ff2f33006 100644
--- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
+++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
@@ -82,7 +82,7 @@ __NR_sys_timer_settime	255		sys_timer_settime	(kernel_timer_t timer_id, int flag
 __NR_sys_timer_gettime	256		sys_timer_gettime	(int timer_id, const struct itimerspec *setting)
 __NR_sys_timer_getoverrun	257		sys_timer_getoverrun	(int timer_id)
 __NR_sys_timer_delete	258		sys_timer_delete	(kernel_timer_t timer_id)
-__NR_clock_gettime	260		sys_clock_gettime	(const clockid_t which_clock, const struct timespec *tp)
+__NR_clock_gettime	260		sys_clock_gettime	(clockid_t which_clock, struct timespec *tp)
 __NR_exit_group		248		sys_exit_group		(int error_code)
 __NR_waitid		281		sys_waitid		(int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
 __NR_set_robust_list	304		sys_set_robust_list	(struct robust_list_head *head, size_t len)
@@ -114,7 +114,9 @@ __NR_fsopen		430		sys_fsopen		(char *fsname, unsigned int flags)
 __NR_fsconfig		431		sys_fsconfig		(int fd, unsigned int cmd, const char *key, const char *value, int aux)
 __NR_fsmount		432		sys_fsmount		(int fd, unsigned int flags, unsigned int attr_flags)
 __NR_clone3		435		sys_clone3		(struct clone_args *uargs, size_t size)
+__NR_close_range	436		sys_close_range		(unsigned int fd, unsigned int max_fd, unsigned int flags)
 __NR_pidfd_open		434		sys_pidfd_open		(pid_t pid, unsigned int flags)
 __NR_openat2		437		sys_openat2		(int dirfd, char *pathname, struct open_how *how, size_t size)
 __NR_pidfd_getfd	438		sys_pidfd_getfd		(int pidfd, int targetfd, unsigned int flags)
 __NR_rseq       	383		sys_rseq		(void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+__NR_membarrier 	356		sys_membarrier		(int cmd, unsigned int flags, int cpu_id)
diff --git a/compel/arch/s390/src/lib/infect.c b/compel/arch/s390/src/lib/infect.c
index 3cd25e71d..a77b38917 100644
--- a/compel/arch/s390/src/lib/infect.c
+++ b/compel/arch/s390/src/lib/infect.c
@@ -293,10 +293,9 @@ static int s390_disable_ri_bit(pid_t pid, user_regs_struct_t *regs)
 /*
  * Prepare task registers for restart
  */
-int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs, save_regs_t save,
 			 void *arg, __maybe_unused unsigned long flags)
 {
-	user_fpregs_struct_t tmp, *fpregs = ext_regs ? ext_regs : &tmp;
 	struct iovec iov;
 	int rewind;
 
@@ -349,7 +348,7 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct
 		}
 	}
 	/* Call save_task_regs() */
-	return save(arg, regs, fpregs);
+	return save(pid, arg, regs, fpregs);
 }
 
 int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
diff --git a/compel/arch/x86/plugins/std/parasite-head.S b/compel/arch/x86/plugins/std/parasite-head.S
index 4fb38d1f1..42cad4808 100644
--- a/compel/arch/x86/plugins/std/parasite-head.S
+++ b/compel/arch/x86/plugins/std/parasite-head.S
@@ -34,7 +34,21 @@ END(__export_parasite_head_start_compat)
 .code64
 #endif
 
+/*
+ * When parasite_service() runs in the daemon mode it will return the stack
+ * pointer for the sigreturn frame in %rax and we call sigreturn directly
+ * from here.
+ * Since a valid stack pointer is positive, it is safe to presume that
+ * return value <= 0 means that parasite_service() called parasite_trap_cmd()
+ * in non-daemon mode, and the parasite should stop at int3.
+ */
 ENTRY(__export_parasite_head_start)
 	call	parasite_service
+	cmp	$0, %rax
+	jle	1f
+	movq	%rax, %rsp
+	movq	$15, %rax
+	syscall
+1:
 	int	$0x03
 END(__export_parasite_head_start)
diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
index a119a59b2..cc23dc3f3 100644
--- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
+++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
@@ -102,7 +102,9 @@ __NR_fsopen		430		sys_fsopen		(char *fsname, unsigned int flags)
 __NR_fsconfig		431		sys_fsconfig		(int fd, unsigned int cmd, const char *key, const char *value, int aux)
 __NR_fsmount		432		sys_fsmount		(int fd, unsigned int flags, unsigned int attr_flags)
 __NR_clone3		435		sys_clone3		(struct clone_args *uargs, size_t size)
+__NR_close_range	436		sys_close_range		(unsigned int fd, unsigned int max_fd, unsigned int flags)
 __NR_pidfd_open		434		sys_pidfd_open		(pid_t pid, unsigned int flags)
 __NR_openat2		437		sys_openat2		(int dirfd, char *pathname, struct open_how *how, size_t size)
 __NR_pidfd_getfd	438		sys_pidfd_getfd		(int pidfd, int targetfd, unsigned int flags)
 __NR_rseq       	386		sys_rseq		(void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+__NR_membarrier 	375		sys_membarrier		(int cmd, unsigned int flags, int cpu_id)
diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
index 16dd86e79..8c3620c2a 100644
--- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
+++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
@@ -85,7 +85,7 @@ __NR_sys_timer_settime		223		sys_timer_settime	(kernel_timer_t timer_id, int fla
 __NR_sys_timer_gettime		224		sys_timer_gettime	(int timer_id, const struct itimerspec *setting)
 __NR_sys_timer_getoverrun	225		sys_timer_getoverrun	(int timer_id)
 __NR_sys_timer_delete		226		sys_timer_delete	(kernel_timer_t timer_id)
-__NR_clock_gettime		228		sys_clock_gettime	(const clockid_t which_clock, const struct timespec *tp)
+__NR_clock_gettime		228		sys_clock_gettime	(clockid_t which_clock, struct timespec *tp)
 __NR_exit_group			231		sys_exit_group		(int error_code)
 __NR_openat			257		sys_openat		(int dfd, const char *filename, int flags, int mode)
 __NR_waitid			247		sys_waitid		(int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
@@ -113,7 +113,10 @@ __NR_fsopen			430		sys_fsopen		(char *fsname, unsigned int flags)
 __NR_fsconfig			431		sys_fsconfig		(int fd, unsigned int cmd, const char *key, const char *value, int aux)
 __NR_fsmount			432		sys_fsmount		(int fd, unsigned int flags, unsigned int attr_flags)
 __NR_clone3			435		sys_clone3		(struct clone_args *uargs, size_t size)
+__NR_close_range		436		sys_close_range		(unsigned int fd, unsigned int max_fd, unsigned int flags)
 __NR_pidfd_open			434		sys_pidfd_open		(pid_t pid, unsigned int flags)
 __NR_openat2		437		sys_openat2		(int dirfd, char *pathname, struct open_how *how, size_t size)
 __NR_pidfd_getfd		438		sys_pidfd_getfd		(int pidfd, int targetfd, unsigned int flags)
 __NR_rseq       		334		sys_rseq		(void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+__NR_membarrier 		324		sys_membarrier		(int cmd, unsigned int flags, int cpu_id)
+__NR_map_shadow_stack		453		sys_map_shadow_stack	(unsigned long addr, unsigned long size, unsigned int flags)
diff --git a/compel/arch/x86/src/lib/include/uapi/asm/cpu.h b/compel/arch/x86/src/lib/include/uapi/asm/cpu.h
index 63ff83dbe..11c50e0e5 100644
--- a/compel/arch/x86/src/lib/include/uapi/asm/cpu.h
+++ b/compel/arch/x86/src/lib/include/uapi/asm/cpu.h
@@ -244,6 +244,7 @@ enum cpuid_leafs {
 #define X86_FEATURE_PKU		     (11 * 32 + 3)  /* Protection Keys for Userspace */
 #define X86_FEATURE_OSPKE	     (11 * 32 + 4)  /* OS Protection Keys Enable */
 #define X86_FEATURE_AVX512_VBMI2     (11 * 32 + 6)  /* Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_SHSTK            (11 * 32 + 7)  /* Shadow Stack */
 #define X86_FEATURE_GFNI	     (11 * 32 + 8)  /* Galois Field New Instructions */
 #define X86_FEATURE_VAES	     (11 * 32 + 9)  /* Vector AES */
 #define X86_FEATURE_VPCLMULQDQ	     (11 * 32 + 10) /* Carry-Less Multiplication Double Quadword */
diff --git a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h
index bd3b0cbd5..d595a68fc 100644
--- a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h
+++ b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h
@@ -21,7 +21,28 @@
 #define XSTATE_YMM 0x4
 
 #define FXSAVE_SIZE 512
-#define XSAVE_SIZE  4096
+/*
+ * This used to be 4096 (one page). There is a comment below concerning
+ * this size:
+ *  "One page should be enough for the whole xsave state ;-)"
+ * Which is kind of funny as it is no longer enough ;-)
+ *
+ * Older CPUs:
+ * # cpuid -1 -l 0xd -s 0
+ * ...
+ *     bytes required by XSAVE/XRSTOR area     = 0x00000988 (2440)
+ *
+ * Newer CPUs (Sapphire Rapids):
+ * # cpuid -1 -l 0xd -s 0
+ * ...
+ *     bytes required by XSAVE/XRSTOR area     = 0x00002b00 (11008)
+ *
+ * So one page is no longer enough... But:
+ *
+ * Four pages should be enough for the whole xsave state ;-)
+ */
+
+#define XSAVE_SIZE  4*4096
 
 #define XSAVE_HDR_SIZE	 64
 #define XSAVE_HDR_OFFSET FXSAVE_SIZE
@@ -224,6 +245,14 @@ struct pkru_state {
 	uint32_t pad;
 } __packed;
 
+/*
+ * State component 11 is Control-flow Enforcement user states
+ */
+struct cet_user_state {
+	uint64_t cet;			/* user control-flow settings */
+	uint64_t ssp;			/* user shadow stack pointer */
+};
+
 /*
  * This is our most modern FPU state format, as saved by the XSAVE
  * and restored by the XRSTOR instructions.
@@ -235,8 +264,11 @@ struct pkru_state {
  *
  *
  * One page should be enough for the whole xsave state ;-)
+ *
+ * Of course it was not ;-) Now using four pages...
+ *
  */
-#define EXTENDED_STATE_AREA_SIZE (4096 - sizeof(struct i387_fxsave_struct) - sizeof(struct xsave_hdr_struct))
+#define EXTENDED_STATE_AREA_SIZE (XSAVE_SIZE - sizeof(struct i387_fxsave_struct) - sizeof(struct xsave_hdr_struct) - sizeof(struct cet_user_state))
 
 /*
  * cpu requires it to be 64 byte aligned
@@ -252,6 +284,7 @@ struct xsave_struct {
 		struct ymmh_struct ymmh;
 		uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE];
 	};
+	struct cet_user_state cet;
 } __aligned(FP_MIN_ALIGN_BYTES) __packed;
 
 struct xsave_struct_ia32 {
diff --git a/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h
index b35504ff8..b998c488c 100644
--- a/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h
+++ b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h
@@ -143,4 +143,11 @@ typedef struct xsave_struct user_fpregs_struct_t;
  */
 #define __NR32_mmap __NR32_mmap2
 
+extern bool __compel_shstk_enabled(user_fpregs_struct_t *ext_regs);
+#define compel_shstk_enabled __compel_shstk_enabled
+
+extern int __parasite_setup_shstk(struct parasite_ctl *ctl,
+				user_fpregs_struct_t *ext_regs);
+#define parasite_setup_shstk __parasite_setup_shstk
+
 #endif /* UAPI_COMPEL_ASM_TYPES_H__ */
diff --git a/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h b/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h
index ec8c156fa..4a2e67559 100644
--- a/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h
+++ b/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h
@@ -177,6 +177,24 @@ static inline void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)
 #define USER32_CS 0x23
 
 /* clang-format off */
+/*
+ * rst_sigreturn in resorer is noninline call which adds an entry to the
+ * shadow stack above the sigframe token;
+ * if shadow stack is enabled, increment the shadow stack pointer to remove
+ * that entry
+ */
+#define ARCH_SHSTK_POP()						\
+	asm volatile(							\
+		     "xor %%rax, %%rax\n"				\
+		     "rdsspq %%rax\n"					\
+		     "cmpq $0, %%rax\n"					\
+		     "jz 1f\n"						\
+		     "movq $1, %%rax\n"					\
+		     "incsspq %%rax\n"					\
+		     "1:\n"						\
+		     : :						\
+		     : "rax")
+
 #define ARCH_RT_SIGRETURN_NATIVE(new_sp)				\
 	asm volatile(							\
 		     "movq %0, %%rax				    \n"	\
@@ -203,10 +221,19 @@ static inline void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)
 		: "rdi"(new_sp)						\
 		: "eax", "r8", "r9", "r10", "r11", "memory")
 
-#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe)				\
+#define ARCH_RT_SIGRETURN_RST(new_sp, rt_sigframe)			\
+do {									\
+	if ((rt_sigframe)->is_native) {					\
+		ARCH_SHSTK_POP();					\
+		ARCH_RT_SIGRETURN_NATIVE(new_sp);			\
+	} else								\
+		ARCH_RT_SIGRETURN_COMPAT(new_sp);			\
+} while (0)
+
+#define ARCH_RT_SIGRETURN_DUMP(new_sp, rt_sigframe)			\
 do {									\
 	if ((rt_sigframe)->is_native)					\
-		ARCH_RT_SIGRETURN_NATIVE(new_sp);			\
+		return new_sp;						\
 	else								\
 		ARCH_RT_SIGRETURN_COMPAT(new_sp);			\
 } while (0)
diff --git a/compel/arch/x86/src/lib/infect.c b/compel/arch/x86/src/lib/infect.c
index 98e2512e7..afcf2c53b 100644
--- a/compel/arch/x86/src/lib/infect.c
+++ b/compel/arch/x86/src/lib/infect.c
@@ -26,6 +26,16 @@
 #ifndef NT_X86_XSTATE
 #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
 #endif
+
+#ifndef NT_X86_SHSTK
+#define NT_X86_SHSTK 0x204	/* x86 shstk state */
+#endif
+
+#ifndef ARCH_SHSTK_STATUS
+#define ARCH_SHSTK_STATUS	0x5005
+#define ARCH_SHSTK_SHSTK	(1ULL << 0)
+#endif
+
 #ifndef NT_PRSTATUS
 #define NT_PRSTATUS 1 /* Contains copy of prstatus struct */
 #endif
@@ -220,6 +230,16 @@ int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigfr
 #define get_signed_user_reg(pregs, name) \
 	((user_regs_native(pregs)) ? (int64_t)((pregs)->native.name) : (int32_t)((pregs)->compat.name))
 
+static int get_task_fpregs(pid_t pid, user_fpregs_struct_t *xsave)
+{
+	if (ptrace(PTRACE_GETFPREGS, pid, NULL, xsave)) {
+		pr_perror("Can't obtain FPU registers for %d", pid);
+		return -1;
+	}
+
+	return 0;
+}
+
 static int get_task_xsave(pid_t pid, user_fpregs_struct_t *xsave)
 {
 	struct iovec iov;
@@ -232,19 +252,75 @@ static int get_task_xsave(pid_t pid, user_fpregs_struct_t *xsave)
 		return -1;
 	}
 
-	return 0;
-}
+	if ((xsave->xsave_hdr.xstate_bv & 3) != 3) {
+		// Due to init-optimisation [1] x87 FPU or SSE state may not be filled in.
+		// Since those are restored unconditionally, make sure the init values are
+		// filled by retrying with old PTRACE_GETFPREGS.
+		//
+		// [1] Intel® 64 and IA-32 Architectures Software Developer's
+		//     Manual Volume 1: Basic Architecture
+		//     Section 13.6: Processor tracking of XSAVE-managed state
+		if (get_task_fpregs(pid, xsave))
+			return -1;
+	}
 
-static int get_task_fpregs(pid_t pid, user_fpregs_struct_t *xsave)
-{
-	if (ptrace(PTRACE_GETFPREGS, pid, NULL, xsave)) {
-		pr_perror("Can't obtain FPU registers for %d", pid);
-		return -1;
+	/*
+	 * xsave may be on stack, if we don't clear it explicitly we get
+	 * funky shadow stack state
+	 */
+	memset(&xsave->cet, 0, sizeof(xsave->cet));
+	if (compel_cpu_has_feature(X86_FEATURE_SHSTK)) {
+		unsigned long ssp = 0;
+		unsigned long features = 0;
+
+		if (ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long)&features, ARCH_SHSTK_STATUS)) {
+			/*
+			 * kernels that don't support shadow stack return
+			 * -EINVAL
+			 */
+			if (errno == EINVAL)
+				return 0;
+
+			pr_perror("shstk: can't get shadow stack status for %d", pid);
+			return -1;
+		}
+
+		if (!(features & ARCH_SHSTK_SHSTK))
+			return 0;
+
+		iov.iov_base = &ssp;
+		iov.iov_len = sizeof(ssp);
+
+		if (ptrace(PTRACE_GETREGSET, pid, (unsigned int)NT_X86_SHSTK, &iov) < 0) {
+			/* ENODEV means CET is not supported by the CPU  */
+			if (errno != ENODEV) {
+				pr_perror("shstk: can't get SSP for %d", pid);
+				return -1;
+			}
+		}
+
+		xsave->cet.cet = features;
+		xsave->cet.ssp = ssp;
+
+		pr_debug("%d: shstk: cet: %lx ssp: %lx\n", pid, xsave->cet.cet, xsave->cet.ssp);
 	}
 
 	return 0;
 }
 
+static inline void fixup_mxcsr(struct xsave_struct *xsave)
+{
+	/*
+	 * Right now xsave->i387.mxcsr filled with the random garbage,
+	 * let's make it valid by applying mask which allows all
+	 * features, except the denormals-are-zero feature bit.
+	 *
+	 * See also fpu__init_system_mxcsr function:
+	 * https://github.com/torvalds/linux/blob/8cb1ae19/arch/x86/kernel/fpu/init.c#L117
+	 */
+	xsave->i387.mxcsr &= 0x0000ffbf;
+}
+
 /* See arch/x86/kernel/fpu/xstate.c */
 static void validate_random_xstate(struct xsave_struct *xsave)
 {
@@ -272,17 +348,6 @@ static void validate_random_xstate(struct xsave_struct *xsave)
 
 	/* No reserved bits may be set */
 	memset(&hdr->reserved, 0, sizeof(hdr->reserved));
-
-	/*
-	 * While using PTRACE_SETREGSET the kernel checks that
-	 * "Reserved bits in MXCSR must be zero."
-	 * if (mxcsr[0] & ~mxcsr_feature_mask)
-	 *	return -EINVAL;
-	 *
-	 * As the mxcsr_feature_mask depends on the CPU the easiest solution for
-	 * this error injection test is to set mxcsr just to zero.
-	 */
-	xsave->i387.mxcsr = 0;
 }
 
 /*
@@ -309,6 +374,8 @@ static int corrupt_extregs(pid_t pid)
 	 */
 	pr_err("Corrupting %s for %d, seed %u\n", use_xsave ? "xsave" : "fpuregs", pid, init_seed);
 
+	fixup_mxcsr(&ext_regs);
+
 	if (!use_xsave) {
 		if (ptrace(PTRACE_SETFPREGS, pid, NULL, &ext_regs)) {
 			pr_perror("Can't set FPU registers for %d", pid);
@@ -330,10 +397,9 @@ static int corrupt_extregs(pid_t pid)
 	return 0;
 }
 
-int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *xs, save_regs_t save,
 			 void *arg, unsigned long flags)
 {
-	user_fpregs_struct_t xsave = {}, *xs = ext_regs ? ext_regs : &xsave;
 	int ret = -1;
 
 	pr_info("Dumping general registers for %d in %s mode\n", pid, user_regs_native(regs) ? "native" : "compat");
@@ -387,7 +453,7 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct
 		goto err;
 
 out:
-	ret = save(arg, regs, xs);
+	ret = save(pid, arg, regs, xs);
 err:
 	return ret;
 }
@@ -584,6 +650,7 @@ int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
 
 int ptrace_set_breakpoint(pid_t pid, void *addr)
 {
+	k_rtsigset_t block;
 	int ret;
 
 	/* Set a breakpoint */
@@ -599,6 +666,16 @@ int ptrace_set_breakpoint(pid_t pid, void *addr)
 		return -1;
 	}
 
+	/*
+	 * FIXME(issues/1429): SIGTRAP can't be blocked, otherwise its handler
+	 * will be reset to the default one.
+	 */
+	ksigfillset(&block);
+	ksigdelset(&block, SIGTRAP);
+	if (ptrace(PTRACE_SETSIGMASK, pid, sizeof(k_rtsigset_t), &block)) {
+		pr_perror("Can't block signals for %d", pid);
+		return -1;
+	}
 	ret = ptrace(PTRACE_CONT, pid, NULL, NULL);
 	if (ret) {
 		pr_perror("Unable to restart the  stopped tracee process %d", pid);
@@ -672,3 +749,59 @@ unsigned long compel_task_size(void)
 {
 	return TASK_SIZE;
 }
+
+bool __compel_shstk_enabled(user_fpregs_struct_t *ext_regs)
+{
+	if (!compel_cpu_has_feature(X86_FEATURE_SHSTK))
+		return false;
+
+	if (ext_regs->cet.cet & ARCH_SHSTK_SHSTK)
+		return true;
+
+	return false;
+}
+
+int parasite_setup_shstk(struct parasite_ctl *ctl, __maybe_unused user_fpregs_struct_t *ext_regs)
+{
+	pid_t pid = ctl->rpid;
+	unsigned long sa_restorer = ctl->parasite_ip;
+	unsigned long long ssp;
+	unsigned long token;
+	struct iovec iov;
+
+	if (!compel_shstk_enabled(ext_regs))
+		return 0;
+
+	iov.iov_base = &ssp;
+	iov.iov_len = sizeof(ssp);
+	if (ptrace(PTRACE_GETREGSET, pid, (unsigned int)NT_X86_SHSTK, &iov) < 0) {
+		/* ENODEV means CET is not supported by the CPU  */
+		if (errno != ENODEV) {
+			pr_perror("shstk: %d: cannot get SSP", pid);
+			return -1;
+		}
+	}
+
+	/* The token is for 64-bit */
+	token = ALIGN_DOWN(ssp, 8);
+	token |= (1UL << 63);
+	ssp = ALIGN_DOWN(ssp, 8) - 8;
+	if (ptrace(PTRACE_POKEDATA, pid, (void *)ssp, token)) {
+		pr_perror("shstk: %d: failed to inject shadow stack token", pid);
+		return -1;
+	}
+
+	ssp = ssp - sizeof(uint64_t);
+	if (ptrace(PTRACE_POKEDATA, pid, (void *)ssp, sa_restorer)) {
+		pr_perror("shstk: %d: failed to inject restorer address", pid);
+		return -1;
+	}
+
+	ssp = ssp + sizeof(uint64_t);
+	if (ptrace(PTRACE_SETREGSET, pid, (unsigned int)NT_X86_SHSTK, &iov) < 0) {
+		pr_perror("shstk: %d: cannot write SSP", pid);
+		return -1;
+	}
+
+	return 0;
+}
diff --git a/compel/include/infect-priv.h b/compel/include/infect-priv.h
index 9d3442839..8e78a7f6c 100644
--- a/compel/include/infect-priv.h
+++ b/compel/include/infect-priv.h
@@ -72,6 +72,7 @@ extern bool arch_can_dump_task(struct parasite_ctl *ctl);
 extern int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
 				void *arg, unsigned long flags);
 extern int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs);
+extern int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs);
 extern int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s);
 extern int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs,
 				     user_fpregs_struct_t *fpregs);
diff --git a/compel/include/log.h b/compel/include/log.h
index 0e33976b1..5250622c8 100644
--- a/compel/include/log.h
+++ b/compel/include/log.h
@@ -1,6 +1,9 @@
 #ifndef COMPEL_LOG_H__
 #define COMPEL_LOG_H__
 
+#include <errno.h>
+#include <string.h>
+
 #include "uapi/compel/log.h"
 
 #ifndef LOG_PREFIX
@@ -45,6 +48,6 @@ extern void compel_print_on_level(unsigned int loglevel, const char *format, ...
 
 #define pr_debug(fmt, ...) compel_print_on_level(COMPEL_LOG_DEBUG, LOG_PREFIX fmt, ##__VA_ARGS__)
 
-#define pr_perror(fmt, ...) pr_err(fmt ": %m\n", ##__VA_ARGS__)
+#define pr_perror(fmt, ...) pr_err(fmt ": %s\n", ##__VA_ARGS__, strerror(errno))
 
 #endif /* COMPEL_LOG_H__ */
diff --git a/compel/include/ptrace.h b/compel/include/ptrace.h
index bf2701e63..00013f937 100644
--- a/compel/include/ptrace.h
+++ b/compel/include/ptrace.h
@@ -5,6 +5,8 @@
 #include <compel/asm/infect-types.h>
 #include <compel/ptrace.h>
 
+#define PTRACE_SYSCALL_TRAP 0x80
+
 #define PTRACE_SI_EVENT(_si_code) (((_si_code)&0xFFFF) >> 8)
 
 extern int ptrace_get_regs(pid_t pid, user_regs_struct_t *regs);
diff --git a/compel/include/uapi/infect-util.h b/compel/include/uapi/infect-util.h
index ace6f6b6b..658df9393 100644
--- a/compel/include/uapi/infect-util.h
+++ b/compel/include/uapi/infect-util.h
@@ -3,11 +3,20 @@
 
 #include "common/compiler.h"
 
+/**
+ * The length of the hash is based on what libuuid provides.
+ * According to the manpage this is:
+ *
+ * The uuid_unparse() function converts the supplied UUID uu from the binary
+ * representation into a 36-byte string (plus trailing '\0')
+ */
+#define RUN_ID_HASH_LENGTH 37
+
 /*
  * compel_run_id is a unique value of the current run. It can be used to
  * generate resource ID-s to avoid conflicts with other processes.
  */
-extern uint64_t compel_run_id;
+extern char compel_run_id[RUN_ID_HASH_LENGTH];
 
 struct parasite_ctl;
 extern int __must_check compel_util_send_fd(struct parasite_ctl *ctl, int fd);
diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h
index 3040a67a7..d21c261b7 100644
--- a/compel/include/uapi/infect.h
+++ b/compel/include/uapi/infect.h
@@ -13,11 +13,21 @@
 
 #define PARASITE_START_AREA_MIN (4096)
 
+#define PARASITE_STACK_SIZE (16 << 10)
+/*
+ * A stack redzone is a small, protected region of memory located immediately
+ * after a parasite stack. It is intended to remain unchanged. While it can be
+ * implemented as a guard page, we want to avoid the overhead of additional
+ * remote system calls.
+ */
+#define PARASITE_STACK_REDZONE 128
+
 extern int __must_check compel_interrupt_task(int pid);
 
 struct seize_task_status {
 	unsigned long long sigpnd;
 	unsigned long long shdpnd;
+	unsigned long long sigblk;
 	char state;
 	int vpid;
 	int ppid;
@@ -30,7 +40,9 @@ extern int __must_check compel_wait_task(int pid, int ppid,
 					 struct seize_task_status *st, void *data);
 
 extern int __must_check compel_stop_task(int pid);
+extern int __must_check compel_parse_stop_signo(int pid);
 extern int compel_resume_task(pid_t pid, int orig_state, int state);
+extern int compel_resume_task_sig(pid_t pid, int orig_state, int state, int stop_signo);
 
 struct parasite_ctl;
 struct parasite_thread_ctl;
@@ -38,9 +50,12 @@ struct parasite_thread_ctl;
 extern struct parasite_ctl __must_check *compel_prepare(int pid);
 extern struct parasite_ctl __must_check *compel_prepare_noctx(int pid);
 extern int __must_check compel_infect(struct parasite_ctl *ctl, unsigned long nr_threads, unsigned long args_size);
+extern int __must_check compel_infect_no_daemon(struct parasite_ctl *ctl, unsigned long nr_threads,
+						unsigned long args_size);
 extern struct parasite_thread_ctl __must_check *compel_prepare_thread(struct parasite_ctl *ctl, int pid);
 extern void compel_release_thread(struct parasite_thread_ctl *);
 
+extern int __must_check compel_start_daemon(struct parasite_ctl *ctl);
 extern int __must_check compel_stop_daemon(struct parasite_ctl *ctl);
 extern int __must_check compel_cure_remote(struct parasite_ctl *ctl);
 extern int __must_check compel_cure_local(struct parasite_ctl *ctl);
@@ -77,9 +92,9 @@ enum trace_flags {
 	TRACE_EXIT,
 };
 
-extern int __must_check compel_stop_on_syscall(int tasks, int sys_nr, int sys_nr_compat, enum trace_flags trace);
+extern int __must_check compel_stop_on_syscall(int tasks, int sys_nr, int sys_nr_compat);
 
-extern int __must_check compel_stop_pie(pid_t pid, void *addr, enum trace_flags *tf, bool no_bp);
+extern int __must_check compel_stop_pie(pid_t pid, void *addr, bool no_bp);
 
 extern int __must_check compel_unmap(struct parasite_ctl *ctl, unsigned long addr);
 
@@ -91,7 +106,7 @@ extern k_rtsigset_t *compel_thread_sigmask(struct parasite_thread_ctl *tctl);
 struct rt_sigframe;
 
 typedef int (*open_proc_fn)(int pid, int mode, const char *fmt, ...) __attribute__((__format__(__printf__, 3, 4)));
-typedef int (*save_regs_t)(void *, user_regs_struct_t *, user_fpregs_struct_t *);
+typedef int (*save_regs_t)(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *);
 typedef int (*make_sigframe_t)(void *, struct rt_sigframe *, struct rt_sigframe *, k_rtsigset_t *);
 
 struct infect_ctx {
@@ -114,6 +129,7 @@ struct infect_ctx {
 	open_proc_fn open_proc;
 
 	int log_fd; /* fd for parasite code to send messages to */
+	unsigned long remote_map_addr; /* User-specified address where to mmap parasitic code, default not set */
 };
 
 extern struct infect_ctx *compel_infect_ctx(struct parasite_ctl *);
@@ -174,4 +190,31 @@ extern uint64_t compel_get_thread_ip(struct parasite_thread_ctl *tctl);
 void compel_set_leader_ip(struct parasite_ctl *ctl, uint64_t v);
 void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v);
 
+extern void compel_get_stack(struct parasite_ctl *ctl, void **rstack, void **r_thread_stack);
+
+#ifndef compel_host_supports_gcs
+static inline bool compel_host_supports_gcs(void)
+{
+	return false;
+}
+#define compel_host_supports_gcs
+#endif
+
+#ifndef compel_shstk_enabled
+static inline bool compel_shstk_enabled(user_fpregs_struct_t *ext_regs)
+{
+	return false;
+}
+#define compel_shstk_enabled
+#endif
+
+#ifndef parasite_setup_shstk
+static inline int parasite_setup_shstk(struct parasite_ctl *ctl,
+				       user_fpregs_struct_t *ext_regs)
+{
+	return 0;
+}
+#define parasite_setup_shstk parasite_setup_shstk
+#endif
+
 #endif
diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h
index 63dfee97f..558124fbd 100644
--- a/compel/include/uapi/ptrace.h
+++ b/compel/include/uapi/ptrace.h
@@ -86,6 +86,19 @@ struct __ptrace_rseq_configuration {
 #define PTRACE_EVENT_STOP 128
 #endif
 
+/*
+ * Amazon Linux 2 uses glibc 2.26. PTRACE_ARCH_PRCTL was added in glibc 2.27.
+ * This allows CRIU to build on Amazon Linux 2.
+ *
+ * Note that in sys/ptrace.h, PTRACE_ARCH_PRCTL is an enum value so the
+ * preprocessor doesn't know about it. PT_ARCH_PRCTL is the preprocessor symbol
+ * that matches the value of PTRACE_ARCH_PRCTL. So look for PT_ARCH_PRCTL to
+ * decide if PTRACE_ARCH_PRCTL is available or not.
+ */
+#if defined(__x86_64__) && !defined(PT_ARCH_PRCTL)
+#define PTRACE_ARCH_PRCTL 30 /* From asm/ptrace-abi.h. */
+#endif
+
 extern int ptrace_suspend_seccomp(pid_t pid);
 
 extern int __must_check ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes);
diff --git a/compel/plugins/include/uapi/std/infect.h b/compel/plugins/include/uapi/std/infect.h
index 08a5a7a80..a729abbd2 100644
--- a/compel/plugins/include/uapi/std/infect.h
+++ b/compel/plugins/include/uapi/std/infect.h
@@ -7,7 +7,7 @@ extern int parasite_get_rpc_sock(void);
 
 extern unsigned int __export_parasite_service_cmd;
 extern void *__export_parasite_service_args_ptr;
-extern int __must_check parasite_service(void);
+extern unsigned long __must_check parasite_service(void);
 
 /*
  * Must be supplied by user plugins.
diff --git a/compel/plugins/std/infect.c b/compel/plugins/std/infect.c
index abecc140f..034201320 100644
--- a/compel/plugins/std/infect.c
+++ b/compel/plugins/std/infect.c
@@ -16,6 +16,10 @@
 
 #include "rpc-pie-priv.h"
 
+#ifndef ARCH_RT_SIGRETURN_DUMP
+#define ARCH_RT_SIGRETURN_DUMP ARCH_RT_SIGRETURN
+#endif
+
 static int tsock = -1;
 
 static struct rt_sigframe *sigframe;
@@ -27,7 +31,7 @@ static struct rt_sigframe *sigframe;
  */
 static unsigned __page_size;
 
-unsigned __attribute((weak)) page_size(void)
+unsigned long __attribute((weak)) page_size(void)
 {
 	return __page_size;
 }
@@ -79,12 +83,13 @@ static int __parasite_daemon_wait_msg(struct ctl_msg *m)
 
 /* Core infect code */
 
-static noinline void fini_sigreturn(unsigned long new_sp)
+static noinline unsigned long fini_sigreturn(unsigned long new_sp)
 {
-	ARCH_RT_SIGRETURN(new_sp, sigframe);
+	ARCH_RT_SIGRETURN_DUMP(new_sp, sigframe);
+	return new_sp;
 }
 
-static int fini(void)
+static unsigned long fini(void)
 {
 	unsigned long new_sp;
 
@@ -96,14 +101,14 @@ static int fini(void)
 	sys_close(tsock);
 	std_log_set_fd(-1);
 
-	fini_sigreturn(new_sp);
+	return fini_sigreturn(new_sp);
 
 	BUG();
 
 	return -1;
 }
 
-static noinline __used int noinline parasite_daemon(void *args)
+static noinline __used unsigned long parasite_daemon(void *args)
 {
 	struct ctl_msg m;
 	int ret = -1;
@@ -140,12 +145,10 @@ static noinline __used int noinline parasite_daemon(void *args)
 	}
 
 out:
-	fini();
-
-	return 0;
+	return fini();
 }
 
-static noinline __used int parasite_init_daemon(void *data)
+static noinline __used unsigned long parasite_init_daemon(void *data)
 {
 	struct parasite_init_args *args = data;
 	int ret;
@@ -178,14 +181,11 @@ static noinline __used int parasite_init_daemon(void *data)
 	} else
 		goto err;
 
-	parasite_daemon(data);
+	return parasite_daemon(data);
 
 err:
 	futex_set_and_wake(&args->daemon_connected, ret);
-	fini();
-	BUG();
-
-	return -1;
+	return fini();
 }
 
 #ifndef __parasite_entry
@@ -203,7 +203,7 @@ err:
 unsigned int __export_parasite_service_cmd = 0;
 void *__export_parasite_service_args_ptr = NULL;
 
-int __used __parasite_entry parasite_service(void)
+unsigned long __used __parasite_entry parasite_service(void)
 {
 	unsigned int cmd = __export_parasite_service_cmd;
 	void *args = __export_parasite_service_args_ptr;
diff --git a/compel/src/lib/infect-util.c b/compel/src/lib/infect-util.c
index 00a7c83f7..dc57e28f7 100644
--- a/compel/src/lib/infect-util.c
+++ b/compel/src/lib/infect-util.c
@@ -7,7 +7,7 @@
 #include "infect-rpc.h"
 #include "infect-util.h"
 
-uint64_t compel_run_id;
+char compel_run_id[RUN_ID_HASH_LENGTH];
 
 int compel_util_send_fd(struct parasite_ctl *ctl, int fd)
 {
diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c
index c78c02a6a..22fcf24fa 100644
--- a/compel/src/lib/infect.c
+++ b/compel/src/lib/infect.c
@@ -38,8 +38,6 @@
 #define UNIX_PATH_MAX (sizeof(struct sockaddr_un) - (size_t)((struct sockaddr_un *)0)->sun_path)
 #endif
 
-#define PARASITE_STACK_SIZE (16 << 10)
-
 #ifndef SECCOMP_MODE_DISABLED
 #define SECCOMP_MODE_DISABLED 0
 #endif
@@ -92,6 +90,12 @@ static int parse_pid_status(int pid, struct seize_task_status *ss, void *data)
 
 			continue;
 		}
+		if (!strncmp(aux, "SigBlk:", 7)) {
+			if (sscanf(aux + 7, "%llx", &ss->sigblk) != 1)
+				goto err_parse;
+
+			continue;
+		}
 	}
 
 	fclose(f);
@@ -186,6 +190,29 @@ static int skip_sigstop(int pid, int nr_signals)
 	return 0;
 }
 
+#define SIG_MASK(sig) (1ULL << ((sig)-1))
+
+#define SIG_IN_MASK(sig, mask) ((sig) > 0 && (sig) <= SIGMAX && (SIG_MASK(sig) & (mask)))
+
+#define SUPPORTED_STOP_MASK ((1ULL << (SIGSTOP - 1)) | (1ULL << (SIGTSTP - 1)))
+
+static inline int sig_stop(int sig)
+{
+	return SIG_IN_MASK(sig, SUPPORTED_STOP_MASK);
+}
+
+int compel_parse_stop_signo(int pid)
+{
+	siginfo_t si;
+
+	if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si) < 0) {
+		pr_perror("SEIZE %d: can't parse stopped siginfo", pid);
+		return -1;
+	}
+
+	return si.si_signo;
+}
+
 /*
  * This routine seizes task putting it into a special
  * state where we can manipulate the task via ptrace
@@ -198,7 +225,7 @@ int compel_wait_task(int pid, int ppid, int (*get_status)(int pid, struct seize_
 		     void *data)
 {
 	siginfo_t si;
-	int status, nr_sigstop;
+	int status, nr_stopsig;
 	int ret = 0, ret2, wait_errno = 0;
 
 	/*
@@ -275,6 +302,11 @@ try_again:
 		goto try_again;
 	}
 
+	if (ptrace(PTRACE_SETOPTIONS, pid, NULL, PTRACE_O_TRACESYSGOOD)) {
+		pr_perror("Unable to set PTRACE_O_TRACESYSGOOD for %d", pid);
+		return -1;
+	}
+
 	if (ss->seccomp_mode != SECCOMP_MODE_DISABLED && ptrace_suspend_seccomp(pid) < 0)
 		goto err;
 
@@ -291,17 +323,32 @@ try_again:
 		goto err;
 	}
 
-	nr_sigstop = 0;
-	if (ss->sigpnd & (1 << (SIGSTOP - 1)))
-		nr_sigstop++;
-	if (ss->shdpnd & (1 << (SIGSTOP - 1)))
-		nr_sigstop++;
-	if (si.si_signo == SIGSTOP)
-		nr_sigstop++;
+	nr_stopsig = 0;
+	if (SIG_IN_MASK(SIGSTOP, ss->sigpnd))
+		nr_stopsig++;
+	if (SIG_IN_MASK(SIGSTOP, ss->shdpnd))
+		nr_stopsig++;
 
-	if (nr_sigstop) {
-		if (skip_sigstop(pid, nr_sigstop))
-			goto err_stop;
+	if (SIG_IN_MASK(SIGTSTP, ss->sigpnd) && !SIG_IN_MASK(SIGTSTP, ss->sigblk))
+		nr_stopsig++;
+	if (SIG_IN_MASK(SIGTSTP, ss->shdpnd) && !SIG_IN_MASK(SIGTSTP, ss->sigblk))
+		nr_stopsig++;
+
+	if (sig_stop(si.si_signo))
+		nr_stopsig++;
+
+	if (nr_stopsig) {
+		if (skip_sigstop(pid, nr_stopsig)) {
+			/*
+			 * Make sure that the task is stopped by a supported stop signal and
+			 * send it again to restore task state before criu intervention.
+			 */
+			if (sig_stop(si.si_signo))
+				kill(pid, si.si_signo);
+			else
+				kill(pid, SIGSTOP);
+			goto err;
+		}
 
 		return COMPEL_TASK_STOPPED;
 	}
@@ -313,8 +360,6 @@ try_again:
 		goto err;
 	}
 
-err_stop:
-	kill(pid, SIGSTOP);
 err:
 	if (ptrace(PTRACE_DETACH, pid, NULL, NULL))
 		pr_perror("Unable to detach from %d", pid);
@@ -322,6 +367,11 @@ err:
 }
 
 int compel_resume_task(pid_t pid, int orig_st, int st)
+{
+	return compel_resume_task_sig(pid, orig_st, st, SIGSTOP);
+}
+
+int compel_resume_task_sig(pid_t pid, int orig_st, int st, int stop_signo)
 {
 	int ret = 0;
 
@@ -345,8 +395,18 @@ int compel_resume_task(pid_t pid, int orig_st, int st)
 		 * task with STOP in queue that would get lost after
 		 * detach, so stop it again.
 		 */
-		if (orig_st == COMPEL_TASK_STOPPED)
-			kill(pid, SIGSTOP);
+		if (orig_st == COMPEL_TASK_STOPPED) {
+			/*
+			 * Check that stop_signo contain supported stop signal.
+			 * If it isn't, then send SIGSTOP. It makes sense in the case
+			 * when we get COMPEL_TASK_STOPPED from old image,
+			 * where stop_signo was not yet supported.
+			 */
+			if (sig_stop(stop_signo))
+				kill(pid, stop_signo);
+			else
+				kill(pid, SIGSTOP);
+		}
 	} else {
 		pr_err("Unknown final state %d\n", st);
 		ret = -1;
@@ -365,7 +425,7 @@ static int gen_parasite_saddr(struct sockaddr_un *saddr, int key)
 	int sun_len;
 
 	saddr->sun_family = AF_UNIX;
-	snprintf(saddr->sun_path, UNIX_PATH_MAX, "X/crtools-pr-%d-%" PRIx64, key, compel_run_id);
+	snprintf(saddr->sun_path, UNIX_PATH_MAX, "X/crtools-pr-%d-%s", key, compel_run_id);
 
 	sun_len = SUN_LEN(saddr);
 	*saddr->sun_path = '\0';
@@ -527,7 +587,7 @@ static int parasite_trap(struct parasite_ctl *ctl, pid_t pid, user_regs_struct_t
 	}
 
 	if (!WIFSTOPPED(status)) {
-		pr_err("Task is still running (pid: %d)\n", pid);
+		pr_err("Task is still running (pid: %d, status: 0x%x)\n", pid, status);
 		goto err;
 	}
 
@@ -677,6 +737,7 @@ static int parasite_start_daemon(struct parasite_ctl *ctl)
 {
 	pid_t pid = ctl->rpid;
 	struct infect_ctx *ictx = &ctl->ictx;
+	user_fpregs_struct_t ext_regs;
 
 	/*
 	 * Get task registers before going daemon, since the
@@ -684,7 +745,7 @@ static int parasite_start_daemon(struct parasite_ctl *ctl)
 	 * while in daemon it is not such.
 	 */
 
-	if (compel_get_task_regs(pid, &ctl->orig.regs, NULL, ictx->save_regs, ictx->regs_arg, ictx->flags)) {
+	if (compel_get_task_regs(pid, &ctl->orig.regs, &ext_regs, ictx->save_regs, ictx->regs_arg, ictx->flags)) {
 		pr_err("Can't obtain regs for thread %d\n", pid);
 		return -1;
 	}
@@ -697,6 +758,9 @@ static int parasite_start_daemon(struct parasite_ctl *ctl)
 	if (ictx->make_sigframe(ictx->regs_arg, ctl->sigframe, ctl->rsigframe, &ctl->orig.sigmask))
 		return -1;
 
+	if (parasite_setup_shstk(ctl, &ext_regs))
+		return -1;
+
 	if (parasite_init_daemon(ctl))
 		return -1;
 
@@ -750,7 +814,7 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size,
 	uint8_t orig_code[MEMFD_FNAME_SZ] = MEMFD_FNAME;
 	pid_t pid = ctl->rpid;
 	long sret = -ENOSYS;
-	int ret, fd, lfd;
+	int ret, fd, lfd, remote_flags;
 
 	if (ctl->ictx.flags & INFECT_NO_MEMFD)
 		return 1;
@@ -794,7 +858,11 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size,
 		goto err_cure;
 	}
 
-	ctl->remote_map = remote_mmap(ctl, NULL, size, remote_prot, MAP_FILE | MAP_SHARED, fd, 0);
+	remote_flags = MAP_FILE | MAP_SHARED;
+	if (ctl->ictx.remote_map_addr){
+		remote_flags |= MAP_FIXED_NOREPLACE;
+	}
+	ctl->remote_map = remote_mmap(ctl, (void *)ctl->ictx.remote_map_addr, size, remote_prot, remote_flags, fd, 0);
 	if (!ctl->remote_map) {
 		pr_err("Can't rmap memfd for parasite blob\n");
 		goto err_curef;
@@ -905,7 +973,7 @@ static int compel_map_exchange(struct parasite_ctl *ctl, unsigned long size)
 	return ret;
 }
 
-int compel_infect(struct parasite_ctl *ctl, unsigned long nr_threads, unsigned long args_size)
+int compel_infect_no_daemon(struct parasite_ctl *ctl, unsigned long nr_threads, unsigned long args_size)
 {
 	int ret;
 	unsigned long p, map_exchange_size, parasite_size = 0;
@@ -986,6 +1054,16 @@ int compel_infect(struct parasite_ctl *ctl, unsigned long nr_threads, unsigned l
 
 	memcpy(ctl->local_map, ctl->pblob.hdr.mem, ctl->pblob.hdr.bsize);
 	compel_relocs_apply(ctl->local_map, ctl->remote_map, &ctl->pblob);
+	/*
+	 * Ensure the infected thread sees the updated code.
+	 *
+	 * On architectures like ARM64, the Data Cache (D-cache) and
+	 * Instruction Cache (I-cache) are not automatically coherent.
+	 * Modifications land in the D-cache, so we must flush (clean) the
+	 * D-cache to push changes to RAM to ensure the CPU fetches the updated
+	 * instructions.
+	 */
+	__builtin___clear_cache(ctl->local_map, ctl->local_map + ctl->pblob.hdr.bsize);
 
 	p = parasite_size;
 
@@ -994,7 +1072,7 @@ int compel_infect(struct parasite_ctl *ctl, unsigned long nr_threads, unsigned l
 
 	p += RESTORE_STACK_SIGFRAME;
 	p += PARASITE_STACK_SIZE;
-	ctl->rstack = ctl->remote_map + p;
+	ctl->rstack = ctl->remote_map + p - PARASITE_STACK_REDZONE;
 
 	/*
 	 * x86-64 ABI requires a 16 bytes aligned stack.
@@ -1008,7 +1086,7 @@ int compel_infect(struct parasite_ctl *ctl, unsigned long nr_threads, unsigned l
 
 	if (nr_threads > 1) {
 		p += PARASITE_STACK_SIZE;
-		ctl->r_thread_stack = ctl->remote_map + p;
+		ctl->r_thread_stack = ctl->remote_map + p - PARASITE_STACK_REDZONE;
 	}
 
 	ret = arch_fetch_sas(ctl, ctl->rsigframe);
@@ -1017,15 +1095,23 @@ int compel_infect(struct parasite_ctl *ctl, unsigned long nr_threads, unsigned l
 		goto err;
 	}
 
-	if (parasite_start_daemon(ctl))
-		goto err;
-
 	return 0;
 
 err:
 	return -1;
 }
 
+int compel_infect(struct parasite_ctl *ctl, unsigned long nr_threads, unsigned long args_size)
+{
+	if (compel_infect_no_daemon(ctl, nr_threads, args_size))
+		return -1;
+
+	if (parasite_start_daemon(ctl))
+		return -1;
+
+	return 0;
+}
+
 struct parasite_thread_ctl *compel_prepare_thread(struct parasite_ctl *ctl, int pid)
 {
 	struct parasite_thread_ctl *tctl;
@@ -1222,7 +1308,7 @@ struct plain_regs_struct {
 	user_fpregs_struct_t fpregs;
 };
 
-static int save_regs_plain(void *to, user_regs_struct_t *r, user_fpregs_struct_t *f)
+static int save_regs_plain(pid_t pid, void *to, user_regs_struct_t *r, user_fpregs_struct_t *f)
 {
 	struct plain_regs_struct *prs = to;
 
@@ -1309,7 +1395,6 @@ static int parasite_fini_seized(struct parasite_ctl *ctl)
 	pid_t pid = ctl->rpid;
 	user_regs_struct_t regs;
 	int status, ret = 0;
-	enum trace_flags flag;
 
 	/* stop getting chld from parasite -- we're about to step-by-step it */
 	if (restore_child_handler(ctl))
@@ -1329,7 +1414,7 @@ static int parasite_fini_seized(struct parasite_ctl *ctl)
 
 	pr_debug("Daemon %d exited trapping\n", pid);
 	if (!WIFSTOPPED(status)) {
-		pr_err("Task is still running (pid: %d)\n", pid);
+		pr_err("Task is still running (pid: %d, status: 0x%x)\n", pid, status);
 		return -1;
 	}
 
@@ -1350,14 +1435,11 @@ static int parasite_fini_seized(struct parasite_ctl *ctl)
 		return -1;
 
 	/* Go to sigreturn as closer as we can */
-	ret = compel_stop_pie(pid, ctl->sigreturn_addr, &flag, ctl->ictx.flags & INFECT_NO_BREAKPOINTS);
+	ret = compel_stop_pie(pid, ctl->sigreturn_addr, ctl->ictx.flags & INFECT_NO_BREAKPOINTS);
 	if (ret < 0)
 		return ret;
 
-	if (compel_stop_on_syscall(1, __NR(rt_sigreturn, 0), __NR(rt_sigreturn, 1), flag))
-		return -1;
-
-	if (ptrace_flush_breakpoints(pid))
+	if (compel_stop_on_syscall(1, __NR(rt_sigreturn, 0), __NR(rt_sigreturn, 1)))
 		return -1;
 
 	/*
@@ -1369,6 +1451,11 @@ static int parasite_fini_seized(struct parasite_ctl *ctl)
 	return 0;
 }
 
+int compel_start_daemon(struct parasite_ctl *ctl)
+{
+	return parasite_start_daemon(ctl);
+}
+
 int compel_stop_daemon(struct parasite_ctl *ctl)
 {
 	if (ctl->daemonized) {
@@ -1489,7 +1576,7 @@ int compel_unmap(struct parasite_ctl *ctl, unsigned long addr)
 	if (ret)
 		goto err;
 
-	ret = compel_stop_on_syscall(1, __NR(munmap, 0), __NR(munmap, 1), TRACE_ENTER);
+	ret = compel_stop_on_syscall(1, __NR(munmap, 0), __NR(munmap, 1));
 
 	/*
 	 * Don't touch extended registers here: they were restored
@@ -1501,12 +1588,12 @@ err:
 	return ret;
 }
 
-int compel_stop_pie(pid_t pid, void *addr, enum trace_flags *tf, bool no_bp)
+int compel_stop_pie(pid_t pid, void *addr, bool no_bp)
 {
 	int ret;
 
 	if (no_bp) {
-		pr_debug("Force no-breakpoints restore\n");
+		pr_debug("Force no-breakpoints restore of %d\n", pid);
 		ret = 0;
 	} else
 		ret = ptrace_set_breakpoint(pid, addr);
@@ -1518,7 +1605,6 @@ int compel_stop_pie(pid_t pid, void *addr, enum trace_flags *tf, bool no_bp)
 		 * PIE will stop on a breakpoint, next
 		 * stop after that will be syscall enter.
 		 */
-		*tf = TRACE_EXIT;
 		return 0;
 	}
 
@@ -1531,14 +1617,12 @@ int compel_stop_pie(pid_t pid, void *addr, enum trace_flags *tf, bool no_bp)
 		pr_perror("Unable to restart the %d process", pid);
 		return -1;
 	}
-
-	*tf = TRACE_ENTER;
 	return 0;
 }
 
 static bool task_is_trapped(int status, pid_t pid)
 {
-	if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
+	if (WIFSTOPPED(status) && (WSTOPSIG(status) & ~PTRACE_SYSCALL_TRAP) == SIGTRAP)
 		return true;
 
 	pr_err("Task %d is in unexpected state: %x\n", pid, status);
@@ -1572,15 +1656,13 @@ static inline int is_required_syscall(user_regs_struct_t *regs, pid_t pid, const
  * sys_nr - the required syscall number
  * sys_nr_compat - the required compatible syscall number
  */
-int compel_stop_on_syscall(int tasks, const int sys_nr, const int sys_nr_compat, enum trace_flags trace)
+int compel_stop_on_syscall(int tasks, const int sys_nr, const int sys_nr_compat)
 {
+	enum trace_flags trace = tasks > 1 ? TRACE_ALL : TRACE_ENTER;
 	user_regs_struct_t regs;
 	int status, ret;
 	pid_t pid;
 
-	if (tasks > 1)
-		trace = TRACE_ALL;
-
 	/* Stop all threads on the enter point in sys_rt_sigreturn */
 	while (tasks) {
 		pid = wait4(-1, &status, __WALL, NULL);
@@ -1594,6 +1676,18 @@ int compel_stop_on_syscall(int tasks, const int sys_nr, const int sys_nr_compat,
 
 		pr_debug("%d was trapped\n", pid);
 
+		if ((WSTOPSIG(status) & PTRACE_SYSCALL_TRAP) == 0) {
+			/*
+			 * On some platforms such as ARM64, it is impossible to
+			 * pass through a breakpoint, so let's clear it right
+			 * after it has been triggered.
+			*/
+			if (ptrace_flush_breakpoints(pid)) {
+				pr_err("Unable to clear breakpoints\n");
+				return -1;
+			}
+			goto goon;
+		}
 		if (trace == TRACE_EXIT) {
 			trace = TRACE_ENTER;
 			pr_debug("`- Expecting exit\n");
@@ -1707,3 +1801,11 @@ void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v)
 {
 	SET_REG_IP(tctl->th.regs, v);
 }
+
+void compel_get_stack(struct parasite_ctl *ctl, void **rstack, void **r_thread_stack)
+{
+	if (rstack)
+		*rstack = ctl->rstack;
+	if (r_thread_stack)
+		*r_thread_stack = ctl->r_thread_stack;
+}
diff --git a/compel/src/lib/ptrace.c b/compel/src/lib/ptrace.c
index 49b685d70..717ee2839 100644
--- a/compel/src/lib/ptrace.c
+++ b/compel/src/lib/ptrace.c
@@ -23,7 +23,7 @@
 
 int ptrace_suspend_seccomp(pid_t pid)
 {
-	if (ptrace(PTRACE_SETOPTIONS, pid, NULL, PTRACE_O_SUSPEND_SECCOMP) < 0) {
+	if (ptrace(PTRACE_SETOPTIONS, pid, NULL, PTRACE_O_SUSPEND_SECCOMP | PTRACE_O_TRACESYSGOOD) < 0) {
 		pr_perror("suspending seccomp failed");
 		return -1;
 	}
diff --git a/compel/src/main.c b/compel/src/main.c
index 632354582..21e06d7dd 100644
--- a/compel/src/main.c
+++ b/compel/src/main.c
@@ -56,6 +56,13 @@ static const flags_t flags = {
 	.cflags = COMPEL_CFLAGS_PIE,
 #elif defined CONFIG_MIPS
 	.arch = "mips",
+	.cflags = COMPEL_CFLAGS_PIE,
+#elif defined CONFIG_LOONGARCH64
+	.arch = "loongarch64",
+	.cflags = COMPEL_CFLAGS_PIE,
+#elif defined CONFIG_RISCV64
+	.arch = "riscv64",
+	.cflags = COMPEL_CFLAGS_PIE,
 #else
 #error "CONFIG_<ARCH> not defined, or unsupported ARCH"
 #endif
diff --git a/compel/test/Makefile b/compel/test/Makefile
index 63fb76f80..f46a821ee 100644
--- a/compel/test/Makefile
+++ b/compel/test/Makefile
@@ -1,4 +1,4 @@
-all: fdspy infect rsys
+all: fdspy infect rsys stack
 
 fdspy:
 	$(Q) $(MAKE) -C fdspy
@@ -10,8 +10,12 @@ infect:
 	$(Q) $(MAKE) -C infect run
 .PHONY: infect
 
-
 rsys:
 	$(Q) $(MAKE) -C rsys
 	$(Q) $(MAKE) -C rsys run
 .PHONY: rsys
+
+stack:
+	$(Q) $(MAKE) -C stack
+	$(Q) $(MAKE) -C stack run
+.PHONY: stack
diff --git a/compel/test/fdspy/spy.c b/compel/test/fdspy/spy.c
index 7f20ea2a7..41de99e20 100644
--- a/compel/test/fdspy/spy.c
+++ b/compel/test/fdspy/spy.c
@@ -110,11 +110,11 @@ static int check_pipe_ends(int wfd, int rfd)
 	printf("Check pipe ends are connected\n");
 	if (write(wfd, "1", 2) != 2) {
 		fprintf(stderr, "write to pipe failed\n");
-		return -1;
+		return 0;
 	}
 	if (read(rfd, aux, sizeof(aux)) != sizeof(aux)) {
 		fprintf(stderr, "read from pipe failed\n");
-		return -1;
+		return 0;
 	}
 	if (aux[0] != '1' || aux[1] != '\0') {
 		fprintf(stderr, "Pipe connectivity lost\n");
diff --git a/compel/test/infect/Makefile b/compel/test/infect/Makefile
index bacfad962..85efa5fd9 100644
--- a/compel/test/infect/Makefile
+++ b/compel/test/infect/Makefile
@@ -3,6 +3,11 @@ CFLAGS	?= -O2 -g -Wall -Werror
 
 COMPEL		:= ../../../compel/compel-host
 
+ifeq ($(GCS_ENABLE),1)
+CFLAGS  += -mbranch-protection=standard -DGCS_TEST_ENABLE=1
+LDFLAGS += -z experimental-gcs=check
+endif
+
 all: victim spy
 
 run:
@@ -17,7 +22,7 @@ clean:
 	rm -f parasite.o
 
 victim: victim.c
-	$(CC) $(CFLAGS) -o $@ $^
+	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
 
 spy: spy.c parasite.h
 	$(CC) $(CFLAGS) $(shell $(COMPEL) includes) -o $@ $< $(shell $(COMPEL) --static libs)
diff --git a/compel/test/infect/spy.c b/compel/test/infect/spy.c
index e7273b446..143946941 100644
--- a/compel/test/infect/spy.c
+++ b/compel/test/infect/spy.c
@@ -94,15 +94,15 @@ static inline int chk(int fd, int val)
 	int v = 0;
 
 	if (read(fd, &v, sizeof(v)) != sizeof(v))
-		return 0;
+		return 1;
 
 	printf("%d, want %d\n", v, val);
-	return v == val;
+	return v != val;
 }
 
 int main(int argc, char **argv)
 {
-	int p_in[2], p_out[2], p_err[2], pid, i, pass = 1;
+	int p_in[2], p_out[2], p_err[2], pid, i, err = 0;
 
 	/*
 	 * Prepare IO-s and fork the victim binary
@@ -112,6 +112,9 @@ int main(int argc, char **argv)
 		return -1;
 	}
 
+#ifdef GCS_TEST_ENABLE
+	setenv("GLIBC_TUNABLES", "glibc.cpu.aarch64_gcs=1:glibc.cpu.aarch64_gcs_policy=2", 1);
+#endif
 	pid = vfork();
 	if (pid == 0) {
 		close(p_in[1]);
@@ -142,9 +145,11 @@ int main(int argc, char **argv)
 		return 1;
 
 	printf("Checking the victim alive\n");
-	pass = chk(p_out[0], 1);
-	pass = chk(p_out[0], 42);
-	if (!pass)
+	err = chk(p_out[0], 1);
+	if (err)
+		return 1;
+	err = chk(p_out[0], 42);
+	if (err)
 		return 1;
 
 	/*
@@ -176,14 +181,14 @@ int main(int argc, char **argv)
 	printf("Checking the result\n");
 
 	/* These two came from parasite */
-	pass = chk(p_out[0], 138);
-	pass = chk(p_out[0], 403);
+	err = chk(p_out[0], 138);
+	err |= chk(p_out[0], 403);
 
 	/* These two came from post-infect */
-	pass = chk(p_out[0], 1234);
-	pass = chk(p_out[0], 4096);
+	err |= chk(p_out[0], 1234);
+	err |= chk(p_out[0], 4096);
 
-	if (pass)
+	if (!err)
 		printf("All OK\n");
 	else
 		printf("Something went WRONG\n");
diff --git a/compel/test/stack/.gitignore b/compel/test/stack/.gitignore
new file mode 100644
index 000000000..0a554758d
--- /dev/null
+++ b/compel/test/stack/.gitignore
@@ -0,0 +1,4 @@
+parasite.h
+parasite.po
+spy
+victim
diff --git a/compel/test/stack/Makefile b/compel/test/stack/Makefile
new file mode 100644
index 000000000..bacfad962
--- /dev/null
+++ b/compel/test/stack/Makefile
@@ -0,0 +1,32 @@
+CC	:= gcc
+CFLAGS	?= -O2 -g -Wall -Werror
+
+COMPEL		:= ../../../compel/compel-host
+
+all: victim spy
+
+run:
+	./spy
+.PHONY: run
+
+clean:
+	rm -f victim
+	rm -f spy
+	rm -f parasite.h
+	rm -f parasite.po
+	rm -f parasite.o
+
+victim: victim.c
+	$(CC) $(CFLAGS) -o $@ $^
+
+spy: spy.c parasite.h
+	$(CC) $(CFLAGS) $(shell $(COMPEL) includes) -o $@ $< $(shell $(COMPEL) --static libs)
+
+parasite.h: parasite.po
+	$(COMPEL) hgen -o $@ -f $<
+
+parasite.po: parasite.o
+	ld $(shell $(COMPEL) ldflags) -o $@ $^ $(shell $(COMPEL) plugins)
+
+parasite.o: parasite.c
+	$(CC) $(CFLAGS) -c $(shell $(COMPEL) cflags) -o $@ $^
diff --git a/compel/test/stack/parasite.c b/compel/test/stack/parasite.c
new file mode 100644
index 000000000..ad13bd25d
--- /dev/null
+++ b/compel/test/stack/parasite.c
@@ -0,0 +1,38 @@
+#include <errno.h>
+
+#include <compel/plugins/std.h>
+#include <infect-rpc.h>
+
+/*
+ * Stubs for std compel plugin.
+ */
+int parasite_trap_cmd(int cmd, void *args)
+{
+	return 0;
+}
+void parasite_cleanup(void)
+{
+}
+
+#define PARASITE_CMD_INC PARASITE_USER_CMDS
+#define PARASITE_CMD_DEC PARASITE_USER_CMDS + 1
+
+int parasite_daemon_cmd(int cmd, void *args)
+{
+	int v;
+
+	switch (cmd) {
+	case PARASITE_CMD_INC:
+		v = (*(int *)args) + 1;
+		break;
+	case PARASITE_CMD_DEC:
+		v = (*(int *)args) - 1;
+		break;
+	default:
+		v = -1;
+		break;
+	}
+
+	sys_write(1, &v, sizeof(int));
+	return 0;
+}
diff --git a/compel/test/stack/spy.c b/compel/test/stack/spy.c
new file mode 100644
index 000000000..184c8ab31
--- /dev/null
+++ b/compel/test/stack/spy.c
@@ -0,0 +1,294 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+
+#include <common/page.h>
+
+#include <compel/log.h>
+#include <compel/infect-rpc.h>
+#include <errno.h>
+
+#include "parasite.h"
+
+#define PARASITE_CMD_INC PARASITE_USER_CMDS
+#define PARASITE_CMD_DEC PARASITE_USER_CMDS + 1
+
+#define err_and_ret(msg)              \
+	do {                          \
+		fprintf(stderr, msg); \
+		return -1;            \
+	} while (0)
+
+void *saved_data = NULL;
+
+#define SAVED_DATA_MAX page_size()
+
+void cleanup_saved_data(void)
+{
+	free(saved_data);
+}
+
+static void print_vmsg(unsigned int lvl, const char *fmt, va_list parms)
+{
+	printf("\tLC%u: ", lvl);
+	vprintf(fmt, parms);
+}
+
+static void *get_parasite_rstack_start(struct parasite_ctl *ctl)
+{
+	void *rstack, *r_thread_stack, *rstack_start;
+
+	compel_get_stack(ctl, &rstack, &r_thread_stack);
+
+	rstack_start = rstack;
+	if (r_thread_stack != NULL && r_thread_stack < rstack_start)
+		rstack_start = r_thread_stack;
+
+	return rstack_start;
+}
+
+static void *read_proc_mem(int pid, void *offset, size_t len)
+{
+	char victim_mem_path[6 + 11 + 4 + 1];
+	int written;
+	int fd;
+	void *data;
+	ssize_t mem_read;
+
+	written = snprintf(victim_mem_path, sizeof(victim_mem_path), "/proc/%d/mem", pid);
+	if (written < 0 || written >= sizeof(victim_mem_path)) {
+		fprintf(stderr, "Failed to create path string to victim's /proc/%d/mem file\n", pid);
+		return NULL;
+	}
+
+	fd = open(victim_mem_path, O_RDONLY);
+	if (fd < 0) {
+		perror("Failed to open victim's /proc/$pid/mem file");
+		return NULL;
+	}
+
+	data = malloc(len);
+	if (data == NULL) {
+		perror("Can't allocate memory to read victim's /proc/$pid/mem file");
+		return NULL;
+	}
+
+	mem_read = pread(fd, data, len, (off_t)offset);
+	if (mem_read == -1) {
+		perror("Failed to read victim's /proc/$pid/mem file");
+		goto freebuf;
+	}
+
+	return data;
+
+freebuf:
+	free(data);
+	return NULL;
+}
+
+static int check_saved_data(struct parasite_ctl *ctl, int pid, void *stack, void *saved_data, size_t saved_data_size)
+{
+	if (saved_data != NULL) {
+		void *current_data;
+
+		current_data = read_proc_mem(pid, stack, saved_data_size);
+		if (current_data == NULL)
+			return -1;
+
+		if (memcmp(saved_data, current_data, saved_data_size) != 0)
+			return 1;
+	}
+
+	return 0;
+}
+
+static int do_infection(int pid)
+{
+	int state;
+	struct parasite_ctl *ctl;
+	struct infect_ctx *ictx;
+	int *arg;
+	void *stack;
+	size_t saved_data_size = PARASITE_STACK_REDZONE;
+	int saved_data_check;
+
+	compel_log_init(print_vmsg, COMPEL_LOG_DEBUG);
+
+	printf("Stopping task\n");
+	state = compel_stop_task(pid);
+	if (state < 0)
+		err_and_ret("Can't stop task\n");
+
+	printf("Preparing parasite ctl\n");
+	ctl = compel_prepare(pid);
+	if (!ctl)
+		err_and_ret("Can't prepare for infection\n");
+
+	printf("Configuring contexts\n");
+
+	/*
+	 * First -- the infection context. Most of the stuff
+	 * is already filled by compel_prepare(), just set the
+	 * log descriptor for parasite side, library cannot
+	 * live w/o it.
+	 */
+	ictx = compel_infect_ctx(ctl);
+	ictx->log_fd = STDERR_FILENO;
+
+	parasite_setup_c_header(ctl);
+
+	printf("Infecting\n");
+	if (compel_infect_no_daemon(ctl, 1, sizeof(int)))
+		err_and_ret("Can't infect victim\n");
+
+	if (atexit(cleanup_saved_data))
+		err_and_ret("Can't register cleanup function with atexit\n");
+
+	stack = get_parasite_rstack_start(ctl);
+
+	if (compel_start_daemon(ctl))
+		err_and_ret("Can't start daemon in victim\n");
+
+	/*
+	 * Now get the area with arguments and run two
+	 * commands one by one.
+	 */
+	arg = compel_parasite_args(ctl, int);
+
+	printf("Running cmd 1\n");
+	*arg = 137;
+	if (compel_rpc_call_sync(PARASITE_CMD_INC, ctl))
+		err_and_ret("Can't run parasite command 1\n");
+
+	printf("Running cmd 2\n");
+	*arg = 404;
+	if (compel_rpc_call_sync(PARASITE_CMD_DEC, ctl))
+		err_and_ret("Can't run parasite command 2\n");
+
+	saved_data_check = check_saved_data(ctl, pid, stack, saved_data, saved_data_size);
+	if (saved_data_check == -1)
+		err_and_ret("Could not check saved data\n");
+	if (saved_data_check != 0)
+		err_and_ret("Saved data unexpectedly modified\n");
+
+	/*
+	 * Done. Cure and resume the task.
+	 */
+	printf("Curing\n");
+	if (compel_cure(ctl))
+		err_and_ret("Can't cure victim\n");
+
+	if (compel_resume_task(pid, state, state))
+		err_and_ret("Can't unseize task\n");
+
+	printf("Done\n");
+
+	return 0;
+}
+
+static inline int chk(int fd, int val)
+{
+	int v = 0;
+
+	if (read(fd, &v, sizeof(v)) != sizeof(v))
+		return 1;
+
+	printf("%d, want %d\n", v, val);
+	return v != val;
+}
+
+int main(int argc, char **argv)
+{
+	int p_in[2], p_out[2], p_err[2], pid, i, err = 0;
+
+	/*
+	 * Prepare IO-s and fork the victim binary
+	 */
+	if (pipe(p_in) || pipe(p_out) || pipe(p_err)) {
+		perror("Can't make pipe");
+		return -1;
+	}
+
+	pid = vfork();
+	if (pid == 0) {
+		close(p_in[1]);
+		dup2(p_in[0], 0);
+		close(p_in[0]);
+		close(p_out[0]);
+		dup2(p_out[1], 1);
+		close(p_out[1]);
+		close(p_err[0]);
+		dup2(p_err[1], 2);
+		close(p_err[1]);
+		execl("./victim", "victim", NULL);
+		exit(1);
+	}
+
+	close(p_in[0]);
+	close(p_out[1]);
+	close(p_err[1]);
+
+	/*
+	 * Tell the little guy some numbers
+	 */
+	i = 1;
+	if (write(p_in[1], &i, sizeof(i)) != sizeof(i))
+		return 1;
+	i = 42;
+	if (write(p_in[1], &i, sizeof(i)) != sizeof(i))
+		return 1;
+
+	printf("Checking the victim alive\n");
+	err = chk(p_out[0], 1);
+	if (err)
+		return 1;
+	err = chk(p_out[0], 42);
+	if (err)
+		return 1;
+
+	/*
+	 * Now do the infection with parasite.c
+	 */
+
+	printf("Infecting the victim\n");
+	if (do_infection(pid))
+		return 1;
+
+	/*
+	 * Tell the victim some more stuff to check it's alive
+	 */
+	i = 1234;
+	if (write(p_in[1], &i, sizeof(i)) != sizeof(i))
+		return 1;
+	i = 4096;
+	if (write(p_in[1], &i, sizeof(i)) != sizeof(i))
+		return 1;
+
+	/*
+	 * Stop the victim and check the infection went well
+	 */
+	printf("Closing victim stdin\n");
+	close(p_in[1]);
+	printf("Waiting for victim to die\n");
+	wait(NULL);
+
+	printf("Checking the result\n");
+
+	/* These two came from parasite */
+	err = chk(p_out[0], 138);
+	err |= chk(p_out[0], 403);
+
+	/* These two came from post-infect */
+	err |= chk(p_out[0], 1234);
+	err |= chk(p_out[0], 4096);
+
+	if (!err)
+		printf("All OK\n");
+	else
+		printf("Something went WRONG\n");
+
+	return 0;
+}
diff --git a/compel/test/stack/victim.c b/compel/test/stack/victim.c
new file mode 100644
index 000000000..f94613fa1
--- /dev/null
+++ b/compel/test/stack/victim.c
@@ -0,0 +1,16 @@
+#include <unistd.h>
+
+int main(int argc, char **argv)
+{
+	int i;
+
+	while (1) {
+		if (read(0, &i, sizeof(i)) != sizeof(i))
+			break;
+
+		if (write(1, &i, sizeof(i)) != sizeof(i))
+			break;
+	}
+
+	return 0;
+}
diff --git a/scripts/ci/apt-install b/contrib/apt-install
similarity index 80%
rename from scripts/ci/apt-install
rename to contrib/apt-install
index 5a790901a..676e0f794 100755
--- a/scripts/ci/apt-install
+++ b/contrib/apt-install
@@ -15,8 +15,7 @@ while true; do
 	if [ "${install_retry_counter}" -gt "${max_apt_retries}" ]; then
 		exit 1
 	fi
-	# shellcheck disable=SC2068
-	apt-get clean -qqy && apt-get update -qqy && apt-get install -qqy --no-install-recommends $@ && break
+	apt-get update -y && apt-get install -y --no-install-recommends "$@" && break
 
 	# In case it is a network error let's wait a bit.
 	echo "Retrying attempt ${install_retry_counter}"
diff --git a/contrib/debian/dev-packages.lst b/contrib/debian/dev-packages.lst
deleted file mode 100644
index c2d1509fa..000000000
--- a/contrib/debian/dev-packages.lst
+++ /dev/null
@@ -1,20 +0,0 @@
-# Required packages for development in Debian
-build-essential
-libprotobuf-dev
-libprotobuf-c-dev
-protobuf-c-compiler
-protobuf-compiler
-python3-protobuf
-libnet-dev
-
-# Extra packages, required for testing and building other tools
-pkg-config
-libnl-3-dev
-libbsd0
-libbsd-dev
-iproute2
-libcap-dev
-libaio-dev
-python3-yaml
-libnl-route-3-dev
-python-future
diff --git a/contrib/dependencies/apk-packages.sh b/contrib/dependencies/apk-packages.sh
new file mode 100755
index 000000000..c47fb9fe0
--- /dev/null
+++ b/contrib/dependencies/apk-packages.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env sh
+
+apk add --no-cache \
+	asciidoctor \
+	bash \
+	build-base \
+	coreutils \
+	e2fsprogs \
+	elfutils-dev \
+	git \
+	gnutls-dev \
+	go \
+	ip6tables \
+	iproute2 \
+	iptables \
+	iptables-legacy \
+	libaio-dev \
+	libbsd-dev \
+	libcap-dev \
+	libcap-utils \
+	libdrm-dev \
+	libnet-dev \
+	libnl3-dev \
+	libtraceevent-dev \
+	libtracefs-dev \
+	nftables \
+	nftables-dev \
+	perl \
+	pkgconfig \
+	procps \
+	protobuf-c-compiler \
+	protobuf-c-dev \
+	protobuf-dev \
+	py3-importlib-metadata \
+	py3-pip \
+	py3-protobuf \
+	py3-yaml \
+	python3 \
+	sudo \
+	tar \
+	util-linux \
+	util-linux-dev
diff --git a/contrib/dependencies/apt-cross-packages.sh b/contrib/dependencies/apt-cross-packages.sh
new file mode 100755
index 000000000..30ce6874c
--- /dev/null
+++ b/contrib/dependencies/apt-cross-packages.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env sh
+
+APT_INSTALL="$(cd "$(dirname "$0")/.." >/dev/null 2>&1 && pwd)/apt-install"
+if [ ! -x "$APT_INSTALL" ]; then
+	echo "Error: apt-install not found or not executable"
+	exit 1
+fi
+
+"$APT_INSTALL" \
+	crossbuild-essential-"${DEBIAN_ARCH}" \
+	iproute2:"${DEBIAN_ARCH}" \
+	libaio-dev:"${DEBIAN_ARCH}" \
+	libbz2-dev:"${DEBIAN_ARCH}" \
+	libc6-"${DEBIAN_ARCH}"-cross \
+	libc6-dev-"${DEBIAN_ARCH}"-cross \
+	libcap-dev:"${DEBIAN_ARCH}" \
+	libdrm-dev:"${DEBIAN_ARCH}" \
+	libelf-dev:"${DEBIAN_ARCH}" \
+	libexpat1-dev:"${DEBIAN_ARCH}" \
+	libgnutls28-dev:"${DEBIAN_ARCH}" \
+	libnet-dev:"${DEBIAN_ARCH}" \
+	libnftables-dev:"${DEBIAN_ARCH}" \
+	libnl-3-dev:"${DEBIAN_ARCH}" \
+	libnl-route-3-dev:"${DEBIAN_ARCH}" \
+	libprotobuf-c-dev:"${DEBIAN_ARCH}" \
+	libprotobuf-dev:"${DEBIAN_ARCH}" \
+	libssl-dev:"${DEBIAN_ARCH}" \
+	libtraceevent-dev:"${DEBIAN_ARCH}" \
+	libtracefs-dev:"${DEBIAN_ARCH}" \
+	ncurses-dev:"${DEBIAN_ARCH}" \
+	uuid-dev:"${DEBIAN_ARCH}" \
+	build-essential \
+	pkg-config \
+	git \
+	protobuf-c-compiler \
+	protobuf-compiler \
+	python3-protobuf
diff --git a/contrib/dependencies/apt-packages.sh b/contrib/dependencies/apt-packages.sh
new file mode 100755
index 000000000..7963be7b4
--- /dev/null
+++ b/contrib/dependencies/apt-packages.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env sh
+
+APT_INSTALL="$(cd "$(dirname "$0")/.." >/dev/null 2>&1 && pwd)/apt-install"
+if [ ! -x "$APT_INSTALL" ]; then
+	echo "Error: apt-install not found or not executable"
+	exit 1
+fi
+
+"$APT_INSTALL" \
+	asciidoctor \
+	bash \
+	bsdmainutils \
+	build-essential \
+	gdb \
+	git-core \
+	iproute2 \
+	iptables \
+	kmod \
+	libaio-dev \
+	libbsd-dev \
+	libcap-dev \
+	libdrm-dev \
+	libelf-dev \
+	libgnutls28-dev \
+	libgnutls30 \
+	libnet-dev \
+	libnl-3-dev \
+	libnl-route-3-dev \
+	libperl-dev \
+	libprotobuf-c-dev \
+	libprotobuf-dev \
+	libselinux-dev \
+	libtraceevent-dev \
+	libtracefs-dev \
+	pkg-config \
+	protobuf-c-compiler \
+	protobuf-compiler \
+	python3-importlib-metadata \
+	python3-pip \
+	python3-protobuf \
+	python3-yaml \
+	time \
+	util-linux \
+	uuid-dev
diff --git a/contrib/dependencies/dnf-packages.sh b/contrib/dependencies/dnf-packages.sh
new file mode 100755
index 000000000..793f267a5
--- /dev/null
+++ b/contrib/dependencies/dnf-packages.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env sh
+
+dnf install -y \
+	asciidoc \
+	binutils \
+	elfutils-libelf-devel \
+	gcc \
+	git \
+	glibc-devel \
+	gnutls-devel \
+	iproute \
+	iptables \
+	libaio-devel \
+	libasan \
+	libbpf-devel \
+	libbsd-devel \
+	libcap-devel \
+	libdrm-devel \
+	libnet-devel \
+	libnl3-devel \
+	libselinux-devel \
+	libtraceevent-devel \
+	libtracefs-devel \
+	libuuid-devel \
+	make \
+	nftables \
+	pkg-config \
+	protobuf \
+	protobuf-c \
+	protobuf-c-devel \
+	protobuf-compiler \
+	protobuf-devel \
+	python-devel \
+	python3-importlib-metadata \
+	python3-protobuf \
+	python3-pyyaml \
+	python3-setuptools \
+	python3-wheel \
+	rubygem-asciidoctor \
+	xmlto
diff --git a/contrib/dependencies/pacman-packages.sh b/contrib/dependencies/pacman-packages.sh
new file mode 100755
index 000000000..260797606
--- /dev/null
+++ b/contrib/dependencies/pacman-packages.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env sh
+
+pacman -Syu --noconfirm \
+	asciidoctor \
+	base-devel \
+	bash \
+	coreutils \
+	diffutils \
+	git \
+	gnutls \
+	go \
+	iproute2 \
+	iptables \
+	libaio \
+	libbsd \
+	libcap \
+	libdrm \
+	libelf \
+	libnet \
+	libnl \
+	libtraceevent \
+	libtracefs \
+	nftables \
+	pkg-config \
+	protobuf \
+	protobuf-c \
+	python-importlib-metadata \
+	python-pip \
+	python-protobuf \
+	python-yaml \
+	sudo \
+	tar \
+	util-linux \
+	util-linux-libs
diff --git a/contrib/docker_cr.sh b/contrib/docker_cr.sh
index 9b43d8ba1..04ef676cd 100755
--- a/contrib/docker_cr.sh
+++ b/contrib/docker_cr.sh
@@ -418,7 +418,7 @@ resolve_path() {
 	local p
 
 	p="${2}"
-	if which realpath > /dev/null; then
+	if command -v realpath > /dev/null; then
 		p=$(realpath "${p}")
 	fi
 	${ECHO} "${1}: ${p}"
@@ -427,7 +427,7 @@ resolve_path() {
 resolve_cmd() {
 	local cpath
 
-	cpath=$(which "${2}")
+	cpath=$(command -v "${2}")
 	resolve_path "${1}" "${cpath}"
 }
 
diff --git a/coredump/coredump.py b/coredump/coredump
old mode 100644
new mode 100755
similarity index 74%
rename from coredump/coredump.py
rename to coredump/coredump
index 5e63d2138..5b3e6f366
--- a/coredump/coredump.py
+++ b/coredump/coredump
@@ -1,8 +1,13 @@
+#!/usr/bin/env python3
+import platform
 import argparse
 import os
+import sys
 
 import criu_coredump
 
+PLATFORMS = ["aarch64", "armv7l", "x86_64"]
+
 
 def coredump(opts):
     generator = criu_coredump.coredump_generator()
@@ -34,7 +39,16 @@ def main():
 
     opts = vars(parser.parse_args())
 
-    coredump(opts)
+    if platform.machine() not in PLATFORMS:
+        print("ERROR: %s is only supported on: %s" % (sys.argv[0], ', '.join(PLATFORMS)))
+        sys.exit(1)
+
+    try:
+        coredump(opts)
+    except SystemExit as error:
+        print('ERROR: %s' % error)
+        print('Exiting')
+        sys.exit(1)
 
 
 if __name__ == '__main__':
diff --git a/coredump/coredump-python2 b/coredump/coredump-python2
deleted file mode 100755
index 564c05ce9..000000000
--- a/coredump/coredump-python2
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python2
-
-import coredump
-
-if __name__ == '__main__':
-    coredump.main()
diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py
index 53b143ec0..acb806ace 100644
--- a/coredump/criu_coredump/coredump.py
+++ b/coredump/criu_coredump/coredump.py
@@ -31,16 +31,11 @@
 import io
 import sys
 import ctypes
+import platform
 
 from pycriu import images
 from . import elf
 
-
-try:
-    from itertools import ifilter as filter
-except ImportError:
-    pass
-
 # Some memory-related constants
 PAGESIZE = 4096
 status = {
@@ -59,6 +54,8 @@ status = {
     "VMA_AREA_SOCKET": 1 << 11,
     "VMA_AREA_VVAR": 1 << 12,
     "VMA_AREA_AIORING": 1 << 13,
+    "VMA_AREA_MEMFD": 1 << 14,
+    "VMA_AREA_UPROBES": 1 << 17,
     "VMA_AREA_UNSUPP": 1 << 31
 }
 
@@ -99,8 +96,13 @@ class coredump:
             buf.write(b"\0" * (8 - len(note.owner)))
             buf.write(note.data)
 
-        offset = ctypes.sizeof(elf.Elf64_Ehdr())
-        offset += (len(self.vmas) + 1) * ctypes.sizeof(elf.Elf64_Phdr())
+        bits = platform.architecture()[0]  # 32 or 64 bits
+
+        ehdr = {"32bit": elf.Elf32_Ehdr, "64bit": elf.Elf64_Ehdr}
+        phdr = {"32bit": elf.Elf32_Phdr, "64bit": elf.Elf64_Phdr}
+
+        offset = ctypes.sizeof(ehdr[bits]())
+        offset += (len(self.vmas) + 1) * ctypes.sizeof(phdr[bits]())
 
         filesz = 0
         for note in self.notes:
@@ -135,6 +137,20 @@ class coredump_generator:
     reg_files = None  # reg-files;
     pagemaps = {}  # pagemap by pid;
 
+    # thread info key based on the current arch
+    thread_info_key = {
+        "aarch64": "ti_aarch64",
+        "armv7l": "ti_arm",
+        "x86_64": "thread_info",
+    }
+
+    machine = platform.machine()  # current arch
+    bits = platform.architecture()[0]  # 32 or 64 bits
+
+    ehdr = {"32bit": elf.Elf32_Ehdr, "64bit": elf.Elf64_Ehdr}  # 32 or 64 bits Ehdr
+    nhdr = {"32bit": elf.Elf32_Nhdr, "64bit": elf.Elf64_Nhdr}  # 32 or 64 bits Nhdr
+    phdr = {"32bit": elf.Elf32_Phdr, "64bit": elf.Elf64_Phdr}  # 32 or 64 bits Phdr
+
     def _img_open_and_strip(self, name, single=False, pid=None):
         """
         Load criu image and strip it from magic and redundant list.
@@ -206,44 +222,62 @@ class coredump_generator:
         """
         Generate elf header for process pid with program headers phdrs.
         """
-        ehdr = elf.Elf64_Ehdr()
+        ei_class = {"32bit": elf.ELFCLASS32, "64bit": elf.ELFCLASS64}
+
+        ehdr = self.ehdr[self.bits]()
 
         ctypes.memset(ctypes.addressof(ehdr), 0, ctypes.sizeof(ehdr))
         ehdr.e_ident[elf.EI_MAG0] = elf.ELFMAG0
         ehdr.e_ident[elf.EI_MAG1] = elf.ELFMAG1
         ehdr.e_ident[elf.EI_MAG2] = elf.ELFMAG2
         ehdr.e_ident[elf.EI_MAG3] = elf.ELFMAG3
-        ehdr.e_ident[elf.EI_CLASS] = elf.ELFCLASS64
+        ehdr.e_ident[elf.EI_CLASS] = ei_class[self.bits]
         ehdr.e_ident[elf.EI_DATA] = elf.ELFDATA2LSB
         ehdr.e_ident[elf.EI_VERSION] = elf.EV_CURRENT
 
+        if self.machine == "armv7l":
+            ehdr.e_ident[elf.EI_OSABI] = elf.ELFOSABI_ARM
+        else:
+            ehdr.e_ident[elf.EI_OSABI] = elf.ELFOSABI_NONE
+
         ehdr.e_type = elf.ET_CORE
-        ehdr.e_machine = elf.EM_X86_64
+        ehdr.e_machine = self._get_e_machine()
         ehdr.e_version = elf.EV_CURRENT
-        ehdr.e_phoff = ctypes.sizeof(elf.Elf64_Ehdr())
-        ehdr.e_ehsize = ctypes.sizeof(elf.Elf64_Ehdr())
-        ehdr.e_phentsize = ctypes.sizeof(elf.Elf64_Phdr())
+        ehdr.e_phoff = ctypes.sizeof(self.ehdr[self.bits]())
+        ehdr.e_ehsize = ctypes.sizeof(self.ehdr[self.bits]())
+        ehdr.e_phentsize = ctypes.sizeof(self.phdr[self.bits]())
         # FIXME Case len(phdrs) > PN_XNUM should be handled properly.
         # See fs/binfmt_elf.c from linux kernel.
         ehdr.e_phnum = len(phdrs)
 
         return ehdr
 
+    def _get_e_machine(self):
+        """
+        Get the e_machine field based on the current architecture.
+        """
+        e_machine_dict = {
+            "aarch64": elf.EM_AARCH64,
+            "armv7l": elf.EM_ARM,
+            "x86_64": elf.EM_X86_64,
+        }
+        return e_machine_dict[self.machine]
+
     def _gen_phdrs(self, pid, notes, vmas):
         """
         Generate program headers for process pid.
         """
         phdrs = []
 
-        offset = ctypes.sizeof(elf.Elf64_Ehdr())
-        offset += (len(vmas) + 1) * ctypes.sizeof(elf.Elf64_Phdr())
+        offset = ctypes.sizeof(self.ehdr[self.bits]())
+        offset += (len(vmas) + 1) * ctypes.sizeof(self.phdr[self.bits]())
 
         filesz = 0
         for note in notes:
             filesz += ctypes.sizeof(note.nhdr) + ctypes.sizeof(note.data) + 8
 
         # PT_NOTE
-        phdr = elf.Elf64_Phdr()
+        phdr = self.phdr[self.bits]()
         ctypes.memset(ctypes.addressof(phdr), 0, ctypes.sizeof(phdr))
         phdr.p_type = elf.PT_NOTE
         phdr.p_offset = offset
@@ -263,7 +297,7 @@ class coredump_generator:
         for vma in vmas:
             offset += filesz
             filesz = vma.filesz
-            phdr = elf.Elf64_Phdr()
+            phdr = self.phdr[self.bits]()
             ctypes.memset(ctypes.addressof(phdr), 0, ctypes.sizeof(phdr))
             phdr.p_type = elf.PT_LOAD
             phdr.p_align = PAGESIZE
@@ -315,13 +349,12 @@ class coredump_generator:
         prpsinfo.pr_ppid = pstree["ppid"]
         prpsinfo.pr_pgrp = pstree["pgid"]
         prpsinfo.pr_sid = pstree["sid"]
-        prpsinfo.pr_psargs = self._gen_cmdline(pid)
-        if (sys.version_info > (3, 0)):
-            prpsinfo.pr_fname = core["tc"]["comm"].encode()
-        else:
-            prpsinfo.pr_fname = core["tc"]["comm"]
+        # prpsinfo.pr_psargs has a limit of 80 characters which means it will
+        # fail here if the cmdline is longer than 80
+        prpsinfo.pr_psargs = self._gen_cmdline(pid)[:80]
+        prpsinfo.pr_fname = core["tc"]["comm"].encode()
 
-        nhdr = elf.Elf64_Nhdr()
+        nhdr = self.nhdr[self.bits]()
         nhdr.n_namesz = 5
         nhdr.n_descsz = ctypes.sizeof(elf.elf_prpsinfo())
         nhdr.n_type = elf.NT_PRPSINFO
@@ -338,7 +371,7 @@ class coredump_generator:
         Generate NT_PRSTATUS note for thread tid of process pid.
         """
         core = self.cores[tid]
-        regs = core["thread_info"]["gpregs"]
+        regs = self._get_gpregs(core)
         pstree = self.pstree[pid]
 
         prstatus = elf.elf_prstatus()
@@ -351,35 +384,9 @@ class coredump_generator:
         prstatus.pr_pgrp = pstree["pgid"]
         prstatus.pr_sid = pstree["sid"]
 
-        prstatus.pr_reg.r15 = regs["r15"]
-        prstatus.pr_reg.r14 = regs["r14"]
-        prstatus.pr_reg.r13 = regs["r13"]
-        prstatus.pr_reg.r12 = regs["r12"]
-        prstatus.pr_reg.rbp = regs["bp"]
-        prstatus.pr_reg.rbx = regs["bx"]
-        prstatus.pr_reg.r11 = regs["r11"]
-        prstatus.pr_reg.r10 = regs["r10"]
-        prstatus.pr_reg.r9 = regs["r9"]
-        prstatus.pr_reg.r8 = regs["r8"]
-        prstatus.pr_reg.rax = regs["ax"]
-        prstatus.pr_reg.rcx = regs["cx"]
-        prstatus.pr_reg.rdx = regs["dx"]
-        prstatus.pr_reg.rsi = regs["si"]
-        prstatus.pr_reg.rdi = regs["di"]
-        prstatus.pr_reg.orig_rax = regs["orig_ax"]
-        prstatus.pr_reg.rip = regs["ip"]
-        prstatus.pr_reg.cs = regs["cs"]
-        prstatus.pr_reg.eflags = regs["flags"]
-        prstatus.pr_reg.rsp = regs["sp"]
-        prstatus.pr_reg.ss = regs["ss"]
-        prstatus.pr_reg.fs_base = regs["fs_base"]
-        prstatus.pr_reg.gs_base = regs["gs_base"]
-        prstatus.pr_reg.ds = regs["ds"]
-        prstatus.pr_reg.es = regs["es"]
-        prstatus.pr_reg.fs = regs["fs"]
-        prstatus.pr_reg.gs = regs["gs"]
+        self._set_pr_regset(prstatus.pr_reg, regs)
 
-        nhdr = elf.Elf64_Nhdr()
+        nhdr = self.nhdr[self.bits]()
         nhdr.n_namesz = 5
         nhdr.n_descsz = ctypes.sizeof(elf.elf_prstatus())
         nhdr.n_type = elf.NT_PRSTATUS
@@ -391,28 +398,83 @@ class coredump_generator:
 
         return note
 
+    def _get_gpregs(self, core):
+        """
+        Get the general purpose registers based on the current architecture.
+        """
+        thread_info_key = self.thread_info_key[self.machine]
+        thread_info = core[thread_info_key]
+
+        return thread_info["gpregs"]
+
+    def _set_pr_regset(self, pr_reg, regs):
+        """
+        Set the pr_reg struct based on the current architecture.
+        """
+        if self.machine == "aarch64":
+            pr_reg.regs = (ctypes.c_ulonglong * len(regs["regs"]))(*regs["regs"])
+            pr_reg.sp = regs["sp"]
+            pr_reg.pc = regs["pc"]
+            pr_reg.pstate = regs["pstate"]
+        elif self.machine == "armv7l":
+            pr_reg.r0 = regs["r0"]
+            pr_reg.r1 = regs["r1"]
+            pr_reg.r2 = regs["r2"]
+            pr_reg.r3 = regs["r3"]
+            pr_reg.r4 = regs["r4"]
+            pr_reg.r5 = regs["r5"]
+            pr_reg.r6 = regs["r6"]
+            pr_reg.r7 = regs["r7"]
+            pr_reg.r8 = regs["r8"]
+            pr_reg.r9 = regs["r9"]
+            pr_reg.r10 = regs["r10"]
+            pr_reg.fp = regs["fp"]
+            pr_reg.ip = regs["ip"]
+            pr_reg.sp = regs["sp"]
+            pr_reg.lr = regs["lr"]
+            pr_reg.pc = regs["pc"]
+            pr_reg.cpsr = regs["cpsr"]
+            pr_reg.orig_r0 = regs["orig_r0"]
+        elif self.machine == "x86_64":
+            pr_reg.r15 = regs["r15"]
+            pr_reg.r14 = regs["r14"]
+            pr_reg.r13 = regs["r13"]
+            pr_reg.r12 = regs["r12"]
+            pr_reg.rbp = regs["bp"]
+            pr_reg.rbx = regs["bx"]
+            pr_reg.r11 = regs["r11"]
+            pr_reg.r10 = regs["r10"]
+            pr_reg.r9 = regs["r9"]
+            pr_reg.r8 = regs["r8"]
+            pr_reg.rax = regs["ax"]
+            pr_reg.rcx = regs["cx"]
+            pr_reg.rdx = regs["dx"]
+            pr_reg.rsi = regs["si"]
+            pr_reg.rdi = regs["di"]
+            pr_reg.orig_rax = regs["orig_ax"]
+            pr_reg.rip = regs["ip"]
+            pr_reg.cs = regs["cs"]
+            pr_reg.eflags = regs["flags"]
+            pr_reg.rsp = regs["sp"]
+            pr_reg.ss = regs["ss"]
+            pr_reg.fs_base = regs["fs_base"]
+            pr_reg.gs_base = regs["gs_base"]
+            pr_reg.ds = regs["ds"]
+            pr_reg.es = regs["es"]
+            pr_reg.fs = regs["fs"]
+            pr_reg.gs = regs["gs"]
+
     def _gen_fpregset(self, pid, tid):
         """
         Generate NT_FPREGSET note for thread tid of process pid.
         """
         core = self.cores[tid]
-        regs = core["thread_info"]["fpregs"]
+        regs = self._get_fpregs(core)
 
         fpregset = elf.elf_fpregset_t()
         ctypes.memset(ctypes.addressof(fpregset), 0, ctypes.sizeof(fpregset))
 
-        fpregset.cwd = regs["cwd"]
-        fpregset.swd = regs["swd"]
-        fpregset.ftw = regs["twd"]
-        fpregset.fop = regs["fop"]
-        fpregset.rip = regs["rip"]
-        fpregset.rdp = regs["rdp"]
-        fpregset.mxcsr = regs["mxcsr"]
-        fpregset.mxcr_mask = regs["mxcsr_mask"]
-        fpregset.st_space = (ctypes.c_uint * len(regs["st_space"]))(
-            *regs["st_space"])
-        fpregset.xmm_space = (ctypes.c_uint * len(regs["xmm_space"]))(
-            *regs["xmm_space"])
+        self._set_fpregset(fpregset, regs)
 
         nhdr = elf.Elf64_Nhdr()
         nhdr.n_namesz = 5
@@ -426,6 +488,86 @@ class coredump_generator:
 
         return note
 
+    def _get_fpregs(self, core):
+        """
+        Get the floating point register dictionary based on the current architecture.
+        """
+        fpregs_key_dict = {"aarch64": "fpsimd", "x86_64": "fpregs"}
+        fpregs_key = fpregs_key_dict[self.machine]
+
+        thread_info_key = self.thread_info_key[self.machine]
+
+        return core[thread_info_key][fpregs_key]
+
+    def _set_fpregset(self, fpregset, regs):
+        """
+        Set the fpregset struct based on the current architecture.
+        """
+        if self.machine == "aarch64":
+            fpregset.vregs = (ctypes.c_ulonglong * len(regs["vregs"]))(*regs["vregs"])
+            fpregset.fpsr = regs["fpsr"]
+            fpregset.fpcr = regs["fpcr"]
+        elif self.machine == "x86_64":
+            fpregset.cwd = regs["cwd"]
+            fpregset.swd = regs["swd"]
+            fpregset.ftw = regs["twd"]
+            fpregset.fop = regs["fop"]
+            fpregset.rip = regs["rip"]
+            fpregset.rdp = regs["rdp"]
+            fpregset.mxcsr = regs["mxcsr"]
+            fpregset.mxcr_mask = regs["mxcsr_mask"]
+            fpregset.st_space = (ctypes.c_uint * len(regs["st_space"]))(
+                *regs["st_space"])
+            fpregset.xmm_space = (ctypes.c_uint * len(regs["xmm_space"]))(
+                *regs["xmm_space"])
+
+    def _gen_arm_tls(self, tid):
+        """
+        Generate NT_ARM_TLS note for thread tid of process pid.
+        """
+        core = self.cores[tid]
+        tls = ctypes.c_ulonglong(core["ti_aarch64"]["tls"])
+
+        nhdr = elf.Elf64_Nhdr()
+        nhdr.n_namesz = 6
+        nhdr.n_descsz = ctypes.sizeof(ctypes.c_ulonglong)
+        nhdr.n_type = elf.NT_ARM_TLS
+
+        note = elf_note()
+        note.data = tls
+        note.owner = b"LINUX"
+        note.nhdr = nhdr
+
+        return note
+
+    def _gen_arm_vfp(self, tid):
+        """
+        Generate NT_ARM_VFP note for thread tid of process pid.
+        """
+        core = self.cores[tid]
+        fpstate = core["ti_arm"]["fpstate"]
+
+        data = elf.vfp_hard_struct()
+        ctypes.memset(ctypes.addressof(data), 0, ctypes.sizeof(data))
+
+        data.vfp_regs = (ctypes.c_uint64 * len(fpstate["vfp_regs"]))(*fpstate["vfp_regs"])
+        data.fpexc = fpstate["fpexc"]
+        data.fpscr = fpstate["fpscr"]
+        data.fpinst = fpstate["fpinst"]
+        data.fpinst2 = fpstate["fpinst2"]
+
+        nhdr = elf.Elf32_Nhdr()
+        nhdr.n_namesz = 6
+        nhdr.n_descsz = ctypes.sizeof(data)
+        nhdr.n_type = elf.NT_ARM_VFP
+
+        note = elf_note()
+        note.data = data
+        note.owner = b"LINUX"
+        note.nhdr = nhdr
+
+        return note
+
     def _gen_x86_xstate(self, pid, tid):
         """
         Generate NT_X86_XSTATE note for thread tid of process pid.
@@ -475,7 +617,7 @@ class coredump_generator:
         # FIXME zeroify everything for now
         ctypes.memset(ctypes.addressof(siginfo), 0, ctypes.sizeof(siginfo))
 
-        nhdr = elf.Elf64_Nhdr()
+        nhdr = self.nhdr[self.bits]()
         nhdr.n_namesz = 5
         nhdr.n_descsz = ctypes.sizeof(elf.siginfo_t())
         nhdr.n_type = elf.NT_SIGINFO
@@ -494,17 +636,22 @@ class coredump_generator:
         mm = self.mms[pid]
         num_auxv = len(mm["mm_saved_auxv"]) // 2
 
-        class elf_auxv(ctypes.Structure):
+        class elf32_auxv(ctypes.Structure):
+            _fields_ = [("auxv", elf.Elf32_auxv_t * num_auxv)]
+
+        class elf64_auxv(ctypes.Structure):
             _fields_ = [("auxv", elf.Elf64_auxv_t * num_auxv)]
 
-        auxv = elf_auxv()
+        elf_auxv = {"32bit": elf32_auxv(), "64bit": elf64_auxv()}
+
+        auxv = elf_auxv[self.bits]
         for i in range(num_auxv):
             auxv.auxv[i].a_type = mm["mm_saved_auxv"][i]
             auxv.auxv[i].a_val = mm["mm_saved_auxv"][i + 1]
 
-        nhdr = elf.Elf64_Nhdr()
+        nhdr = self.nhdr[self.bits]()
         nhdr.n_namesz = 5
-        nhdr.n_descsz = ctypes.sizeof(elf_auxv())
+        nhdr.n_descsz = ctypes.sizeof(elf_auxv[self.bits])
         nhdr.n_type = elf.NT_AUXV
 
         note = elf_note()
@@ -579,12 +726,9 @@ class coredump_generator:
             setattr(data, "start" + str(i), info.start)
             setattr(data, "end" + str(i), info.end)
             setattr(data, "file_ofs" + str(i), info.file_ofs)
-            if (sys.version_info > (3, 0)):
-                setattr(data, "name" + str(i), info.name.encode())
-            else:
-                setattr(data, "name" + str(i), info.name)
+            setattr(data, "name" + str(i), info.name.encode())
 
-        nhdr = elf.Elf64_Nhdr()
+        nhdr = self.nhdr[self.bits]()
 
         nhdr.n_namesz = 5  # strlen + 1
         nhdr.n_descsz = ctypes.sizeof(elf_files())
@@ -601,9 +745,15 @@ class coredump_generator:
         notes = []
 
         notes.append(self._gen_prstatus(pid, tid))
-        notes.append(self._gen_fpregset(pid, tid))
-        notes.append(self._gen_x86_xstate(pid, tid))
+        if self.machine != "armv7l":
+            notes.append(self._gen_fpregset(pid, tid))
         notes.append(self._gen_siginfo(pid, tid))
+        if self.machine == "aarch64":
+            notes.append(self._gen_arm_tls(tid))
+        elif self.machine == "armv7l":
+            notes.append(self._gen_arm_vfp(tid))
+        elif self.machine == "x86_64":
+            notes.append(self._gen_x86_xstate(pid, tid))
 
         return notes
 
@@ -644,7 +794,9 @@ class coredump_generator:
         off = 0  # in pages
         for m in pagemap[1:]:
             found = False
-            for i in range(m["nr_pages"]):
+            num_pages = m.get("nr_pages", m["compat_nr_pages"])
+
+            for i in range(num_pages):
                 if m["vaddr"] + i * PAGESIZE == page_no * PAGESIZE:
                     found = True
                     break
@@ -690,7 +842,11 @@ class coredump_generator:
             files = self.reg_files
             fname = next(filter(lambda x: x["id"] == shmid, files))["name"]
 
-            f = open(fname, 'rb')
+            try:
+                f = open(fname, 'rb')
+            except FileNotFoundError:
+                sys.exit('Required file %s not found.' % fname)
+
             f.seek(off)
 
         start = vma["start"]
diff --git a/coredump/criu_coredump/elf.py b/coredump/criu_coredump/elf.py
index 092b47857..2911f491e 100644
--- a/coredump/criu_coredump/elf.py
+++ b/coredump/criu_coredump/elf.py
@@ -1,5 +1,14 @@
 # Define structures and constants for generating elf file.
 import ctypes
+import platform
+
+MACHINE = platform.machine()
+
+Elf32_Half = ctypes.c_uint16  # typedef uint16_t Elf32_Half;
+Elf32_Word = ctypes.c_uint32  # typedef uint32_t Elf32_Word;
+Elf32_Addr = ctypes.c_uint32  # typedef uint32_t Elf32_Addr;
+Elf32_Off = ctypes.c_uint32  # typedef uint32_t Elf32_Off;
+Elf32_Xword = ctypes.c_uint64  # typedef uint64_t Elf32_Xword;
 
 Elf64_Half = ctypes.c_uint16  # typedef uint16_t Elf64_Half;
 Elf64_Word = ctypes.c_uint32  # typedef uint32_t Elf64_Word;
@@ -7,7 +16,7 @@ Elf64_Addr = ctypes.c_uint64  # typedef uint64_t Elf64_Addr;
 Elf64_Off = ctypes.c_uint64  # typedef uint64_t Elf64_Off;
 Elf64_Xword = ctypes.c_uint64  # typedef uint64_t Elf64_Xword;
 
-# Elf64_Ehdr related constants.
+# Elf_Ehdr related constants.
 
 # e_ident size.
 EI_NIDENT = 16  # #define EI_NIDENT (16)
@@ -28,21 +37,50 @@ EI_CLASS = 4  # #define EI_CLASS        4               /* File class byte index
 
 EI_DATA = 5  # #define EI_DATA         5               /* Data encoding byte index */
 
+EI_OSABI = 7  # #define EI_OSABI	7		/* OS ABI identification */
+
 EI_VERSION = 6  # #define EI_VERSION      6               /* File version byte index */
 
 ELFDATA2LSB = 1  # #define ELFDATA2LSB     1               /* 2's complement, little endian */
 
+ELFCLASS32 = 1  # #define ELFCLASS32	1		/* 32-bit objects */
 ELFCLASS64 = 2  # #define ELFCLASS64      2               /* 64-bit objects */
 
 # Legal values for e_type (object file type).
 ET_CORE = 4  # #define ET_CORE         4               /* Core file */
 
 # Legal values for e_machine (architecture).
+EM_ARM = 40  # #define EM_ARM		40	/* ARM */
 EM_X86_64 = 62  # #define EM_X86_64       62              /* AMD x86-64 architecture */
+EM_AARCH64 = 183  # #define EM_AARCH64	183	/* ARM AARCH64 */
 
 # Legal values for e_version (version).
 EV_CURRENT = 1  # #define EV_CURRENT      1               /* Current version */
 
+# Legal values for e_osabi
+ELFOSABI_NONE = 0  # #define ELFOSABI_NONE		0	/* UNIX System V ABI */
+ELFOSABI_ARM = 97  # #define ELFOSABI_ARM		97	/* ARM */
+
+
+class Elf32_Ehdr(ctypes.Structure):  # typedef struct
+    _fields_ = [
+        ("e_ident",
+         ctypes.c_ubyte * EI_NIDENT),  # unsigned char e_ident[EI_NIDENT];
+        ("e_type", Elf32_Half),  # Elf32_Half e_type;
+        ("e_machine", Elf32_Half),  # Elf32_Half e_machine;
+        ("e_version", Elf32_Word),  # Elf32_Word e_version;
+        ("e_entry", Elf32_Addr),  # Elf32_Addr e_entry;
+        ("e_phoff", Elf32_Off),  # Elf32_Off e_phoff;
+        ("e_shoff", Elf32_Off),  # Elf32_Off e_shoff;
+        ("e_flags", Elf32_Word),  # Elf32_Word e_flags;
+        ("e_ehsize", Elf32_Half),  # Elf32_Half e_ehsize;
+        ("e_phentsize", Elf32_Half),  # Elf32_Half e_phentsize;
+        ("e_phnum", Elf32_Half),  # Elf32_Half e_phnum;
+        ("e_shentsize", Elf32_Half),  # Elf32_Half e_shentsize;
+        ("e_shnum", Elf32_Half),  # Elf32_Half e_shnum;
+        ("e_shstrndx", Elf32_Half)  # Elf32_Half e_shstrndx;
+    ]  # } Elf32_Ehdr;
+
 
 class Elf64_Ehdr(ctypes.Structure):  # typedef struct
     _fields_ = [
@@ -64,7 +102,7 @@ class Elf64_Ehdr(ctypes.Structure):  # typedef struct
     ]  # } Elf64_Ehdr;
 
 
-# Elf64_Phdr related constants.
+# Elf_Phdr related constants.
 
 # Legal values for p_type (segment type).
 PT_LOAD = 1  # #define PT_LOAD         1               /* Loadable program segment */
@@ -76,6 +114,19 @@ PF_W = 1 << 1  # #define PF_W            (1 << 1)        /* Segment is writable
 PF_R = 1 << 2  # #define PF_R            (1 << 2)        /* Segment is readable */
 
 
+class Elf32_Phdr(ctypes.Structure):  # typedef struct
+    _fields_ = [
+        ("p_type", Elf32_Word),  # Elf32_Word p_type;
+        ("p_offset", Elf32_Off),  # Elf32_Off p_offset;
+        ("p_vaddr", Elf32_Addr),  # Elf32_Addr p_vaddr;
+        ("p_paddr", Elf32_Addr),  # Elf32_Addr p_paddr;
+        ("p_filesz", Elf32_Word),  # Elf32_Word p_filesz;
+        ("p_memsz", Elf32_Word),  # Elf32_Word p_memsz;
+        ("p_flags", Elf32_Word),  # Elf32_Word p_flags;
+        ("p_align", Elf32_Word),  # Elf32_Word p_align;
+    ]  # } Elf32_Phdr;
+
+
 class Elf64_Phdr(ctypes.Structure):  # typedef struct
     _fields_ = [
         ("p_type", Elf64_Word),  # Elf64_Word p_type;
@@ -89,7 +140,25 @@ class Elf64_Phdr(ctypes.Structure):  # typedef struct
     ]  # } Elf64_Phdr;
 
 
-# Elf64_auxv_t related constants.
+# Elf_auxv_t related constants.
+
+
+class _Elf32_auxv_t_U(ctypes.Union):
+    _fields_ = [("a_val", ctypes.c_uint32)]
+
+
+class Elf32_auxv_t(ctypes.Structure):  # typedef struct
+    _fields_ = [
+        ("a_type",
+         ctypes.c_uint32),   # uint32_t a_type;		/* Entry type */
+        ("a_un", _Elf32_auxv_t_U)  # union
+
+        # uint32_t a_val;		/* Integer value */
+        # /* We use to have pointer elements added here.  We cannot do that,
+        # though, since it does not work when using 32-bit definitions
+        # on 64-bit platforms and vice versa.  */
+        # } a_un;
+    ]  # } Elf32_auxv_t;
 
 
 class _Elf64_auxv_t_U(ctypes.Union):
@@ -110,7 +179,7 @@ class Elf64_auxv_t(ctypes.Structure):  # typedef struct
     ]  # } Elf64_auxv_t;
 
 
-# Elf64_Nhdr related constants.
+# Elf_Nhdr related constants.
 
 NT_PRSTATUS = 1  # #define NT_PRSTATUS  1  /* Contains copy of prstatus struct */
 NT_FPREGSET = 2  # #define NT_FPREGSET  2  /* Contains copy of fpregset struct */
@@ -119,6 +188,22 @@ NT_AUXV = 6  # #define NT_AUXV  6  /* Contains copy of auxv array */
 NT_SIGINFO = 0x53494749  # #define NT_SIGINFO  0x53494749  /* Contains copy of siginfo_t, size might increase */
 NT_FILE = 0x46494c45  # #define NT_FILE  0x46494c45  /* Contains information about mapped files */
 NT_X86_XSTATE = 0x202  # #define NT_X86_XSTATE  0x202  /* x86 extended state using xsave */
+NT_ARM_VFP = 0x400  # #define NT_ARM_VFP	0x400		/* ARM VFP/NEON registers */
+NT_ARM_TLS = 0x401  # #define NT_ARM_TLS	0x401 /* ARM TLS register */
+
+
+class Elf32_Nhdr(ctypes.Structure):  # typedef struct
+    _fields_ = [
+        (
+            "n_namesz", Elf32_Word
+        ),  # Elf32_Word n_namesz;  /* Length of the note's name.  */
+        (
+            "n_descsz", Elf32_Word
+        ),  # Elf32_Word n_descsz;  /* Length of the note's descriptor.  */
+        (
+            "n_type", Elf32_Word
+        ),  # Elf32_Word n_type;  /* Type of the note.  */
+    ]  # } Elf32_Nhdr;
 
 
 class Elf64_Nhdr(ctypes.Structure):  # typedef struct
@@ -134,7 +219,52 @@ class Elf64_Nhdr(ctypes.Structure):  # typedef struct
     ]  # } Elf64_Nhdr;
 
 
-# Elf64_Shdr related constants.
+# Elf_Shdr related constants.
+
+
+class Elf32_Shdr(ctypes.Structure):
+    _fields_ = [
+        (
+            # Section name (string tbl index)
+            "sh_name", Elf32_Word
+        ),
+        (
+            # Section type
+            "sh_type", Elf32_Word
+        ),
+        (
+            # Section flags
+            "sh_flags", Elf32_Word
+        ),
+        (
+            # Section virtual addr at execution
+            "sh_addr", Elf32_Addr
+        ),
+        (
+            # Section file offset
+            "sh_offset", Elf32_Off
+        ),
+        (
+            # Section size in bytes
+            "sh_size", Elf32_Word
+        ),
+        (
+            # Link to another section
+            "sh_link", Elf32_Word
+        ),
+        (
+            # Additional section information
+            "sh_info", Elf32_Word
+        ),
+        (
+            # Section alignment
+            "sh_addralign", Elf32_Word
+        ),
+        (
+            # Entry size if section holds table
+            "sh_entsize", Elf32_Word
+        )
+    ]
 
 
 class Elf64_Shdr(ctypes.Structure):
@@ -218,7 +348,7 @@ class timeval(ctypes.Structure):  # struct timeval
     ]
 
 
-class user_regs_struct(ctypes.Structure):  # struct user_regs_struct
+class x86_64_user_regs_struct(ctypes.Structure):  # struct x86_64_user_regs_struct
     _fields_ = [
         ("r15",
          ctypes.c_ulonglong),  # __extension__ unsigned long long int r15;
@@ -277,10 +407,73 @@ class user_regs_struct(ctypes.Structure):  # struct user_regs_struct
     ]
 
 
+class aarch64_user_regs_struct(ctypes.Structure):  # struct aarch64_user_regs_struct
+    _fields_ = [
+        ("regs",
+         ctypes.c_ulonglong * 31),  # unsigned long long int regs[31];
+        ("sp",
+         ctypes.c_ulonglong),  # unsigned long long int sp;
+        ("pc",
+         ctypes.c_ulonglong),  # unsigned long long int pc;
+        ("pstate",
+         ctypes.c_ulonglong),  # unsigned long long int pstate;
+    ]
+
+
+class arm_user_regs_struct(ctypes.Structure):  # struct arm_user_regs_struct
+    _fields_ = [
+        ("r0",
+         ctypes.c_ulong),  # unsigned ulong int r0;
+        ("r1",
+         ctypes.c_ulong),  # unsigned ulong int r1;
+        ("r2",
+         ctypes.c_ulong),  # unsigned ulong int r2;
+        ("r3",
+         ctypes.c_ulong),  # unsigned ulong int r3;
+        ("r4",
+         ctypes.c_ulong),  # unsigned ulong int r4;
+        ("r5",
+         ctypes.c_ulong),  # unsigned ulong int r5;
+        ("r6",
+         ctypes.c_ulong),  # unsigned ulong int r6;
+        ("r7",
+         ctypes.c_ulong),  # unsigned ulong int r7;
+        ("r8",
+         ctypes.c_ulong),  # unsigned ulong int r8;
+        ("r9",
+         ctypes.c_ulong),  # unsigned ulong int r9;
+        ("r10",
+         ctypes.c_ulong),  # unsigned ulong int r10;
+        ("fp",
+         ctypes.c_ulong),  # unsigned ulong int fp;
+        ("ip",
+         ctypes.c_ulong),  # unsigned ulong int ip;
+        ("sp",
+         ctypes.c_ulong),  # unsigned ulong int sp;
+        ("lr",
+         ctypes.c_ulong),  # unsigned ulong int lr;
+        ("pc",
+         ctypes.c_ulong),  # unsigned ulong int pc;
+        ("cpsr",
+         ctypes.c_ulong),  # unsigned ulong int cpsr;
+        ("orig_r0",
+         ctypes.c_ulong),  # unsigned ulong int orig_r0;
+    ]
+
+
 # elf_greg_t    = ctypes.c_ulonglong
 # ELF_NGREG = ctypes.sizeof(user_regs_struct)/ctypes.sizeof(elf_greg_t)
 # elf_gregset_t = elf_greg_t*ELF_NGREG
-elf_gregset_t = user_regs_struct
+user_regs_dict = {
+        "aarch64": aarch64_user_regs_struct,
+        "armv7l": arm_user_regs_struct,
+        "x86_64": x86_64_user_regs_struct,
+}
+
+try:
+    elf_gregset_t = user_regs_dict[MACHINE]
+except KeyError:
+    raise ValueError("Current architecture %s is not supported." % MACHINE)
 
 
 class elf_prstatus(ctypes.Structure):  # struct elf_prstatus
@@ -420,7 +613,7 @@ class elf_prpsinfo(ctypes.Structure):  # struct elf_prpsinfo
     ]
 
 
-class user_fpregs_struct(ctypes.Structure):  # struct user_fpregs_struct
+class x86_64_user_fpregs_struct(ctypes.Structure):  # struct x86_64_user_fpregs_struct
     _fields_ = [
         # unsigned short int cwd;
         ("cwd", ctypes.c_ushort),
@@ -447,7 +640,29 @@ class user_fpregs_struct(ctypes.Structure):  # struct user_fpregs_struct
     ]
 
 
-elf_fpregset_t = user_fpregs_struct
+class aarch64_user_fpregs_struct(ctypes.Structure):  # struct aarch64_user_fpregs_struct
+    _fields_ = [
+        # unsigned long long int vregs[64];
+        ("vregs", ctypes.c_ulonglong * 64),
+        # unsigned int fpsr;
+        ("fpsr", ctypes.c_uint),
+        # unsigned int fpcr;
+        ("fpcr", ctypes.c_uint),
+        # unsigned int padding[2];
+        ("padding", ctypes.c_uint * 2),
+    ]
+
+
+user_fpregs_dict = {
+        "aarch64": aarch64_user_fpregs_struct,
+        "armv7l": None,
+        "x86_64": x86_64_user_fpregs_struct,
+}
+
+try:
+    elf_fpregset_t = user_fpregs_dict[MACHINE]
+except KeyError:
+    raise ValueError("Current architecture %s is not supported." % MACHINE)
 
 # siginfo_t related constants.
 
@@ -842,3 +1057,13 @@ class elf_xsave_struct(ctypes.Structure):  # struct xsave_struct {
         # struct ymmh_struct              ymmh;
         ("ymmh", ymmh_struct)
     ]  # } __aligned(FP_MIN_ALIGN_BYTES) __packed;
+
+
+class vfp_hard_struct(ctypes.Structure):  # struct vfp_hard_struct {
+    _fields_ = [
+        ("vfp_regs", ctypes.c_ulonglong * 32),  # __u64 fpregs[32];
+        ("fpexc", ctypes.c_ulong),  # __u32 fpexc;
+        ("fpscr", ctypes.c_ulong),  # __u32 fpscr;
+        ("fpinst", ctypes.c_ulong),  # __u32 fpinst;
+        ("fpinst2", ctypes.c_ulong),  # __u32 fpinst2;
+    ]  # };
diff --git a/coredump/pycriu b/coredump/pycriu
index d13a8790a..d1b6ed5c4 120000
--- a/coredump/pycriu
+++ b/coredump/pycriu
@@ -1 +1 @@
-../lib/py/
\ No newline at end of file
+../lib/pycriu
\ No newline at end of file
diff --git a/crit/.gitignore b/crit/.gitignore
new file mode 100644
index 000000000..10c8ab186
--- /dev/null
+++ b/crit/.gitignore
@@ -0,0 +1,4 @@
+crit.egg-info/
+build/
+dist/
+version.py
diff --git a/crit/Makefile b/crit/Makefile
index 988b481b6..33bd68eed 100644
--- a/crit/Makefile
+++ b/crit/Makefile
@@ -1,13 +1,25 @@
+VERSION_FILE := $(if $(obj),$(addprefix $(obj)/,crit/version.py),crit/version.py)
 
-all-y	+= crit
+all-y	+= ${VERSION_FILE}
+cleanup-y	+= ${VERSION_FILE}
 
-crit/crit: crit/crit-$(PYTHON)
-	$(Q) cp $^ $@
-crit: crit/crit
-.PHONY: crit
+${VERSION_FILE}:
+	$(Q) echo "__version__ = '${CRIU_VERSION}'" > $@
 
-clean-crit:
-	$(Q) $(RM) crit/crit
-.PHONY: clean-crit
-clean: clean-crit
-mrproper: clean
+install: ${VERSION_FILE}
+ifeq ($(SKIP_PIP_INSTALL),0)
+	$(E) "  INSTALL " crit
+	$(Q) $(PYTHON) -m pip install $(PIPFLAGS) --prefix=$(DESTDIR)$(PREFIX) ./crit
+else
+	$(E) " SKIP INSTALL crit"
+endif
+.PHONY: install
+
+uninstall:
+ifeq ($(SKIP_PIP_INSTALL),0)
+	$(E) " UNINSTALL" crit
+	$(Q) $(PYTHON) ./scripts/uninstall_module.py --prefix=$(DESTDIR)$(PREFIX) crit
+else
+	$(E) " SKIP UNINSTALL crit"
+endif
+.PHONY: uninstall
diff --git a/crit/crit-python2 b/crit/crit-python2
deleted file mode 100755
index b0b7d3c3a..000000000
--- a/crit/crit-python2
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python2
-
-from pycriu import cli
-
-if __name__ == '__main__':
-	cli.main()
diff --git a/crit/crit/__init__.py b/crit/crit/__init__.py
new file mode 100644
index 000000000..58f3ace6c
--- /dev/null
+++ b/crit/crit/__init__.py
@@ -0,0 +1 @@
+from .version import __version__
diff --git a/lib/py/cli.py b/crit/crit/__main__.py
similarity index 94%
rename from lib/py/cli.py
rename to crit/crit/__main__.py
index 5419384c3..bce523445 100755
--- a/lib/py/cli.py
+++ b/crit/crit/__main__.py
@@ -1,18 +1,17 @@
-from __future__ import print_function
+#!/usr/bin/env python3
 import argparse
 import sys
 import json
 import os
 
 import pycriu
+from . import __version__
 
 
 def inf(opts):
     if opts['in']:
         return open(opts['in'], 'rb')
     else:
-        if (sys.version_info < (3, 0)):
-            return sys.stdin
         if sys.stdin.isatty():
             # If we are reading from a terminal (not a pipe) we want text input and not binary
             return sys.stdin
@@ -28,8 +27,6 @@ def outf(opts, decode):
             mode = 'w+'
         return open(opts['out'], mode)
     else:
-        if (sys.version_info < (3, 0)):
-            return sys.stdout
         if decode:
             return sys.stdout
         return sys.stdout.buffer
@@ -45,9 +42,9 @@ def decode(opts):
     try:
         img = pycriu.images.load(inf(opts), opts['pretty'], opts['nopl'])
     except pycriu.images.MagicException as exc:
-        print("Unknown magic %#x.\n"\
-          "Maybe you are feeding me an image with "\
-          "raw data(i.e. pages.img)?" % exc.magic, file=sys.stderr)
+        print("Unknown magic %#x.\n"
+              "Maybe you are feeding me an image with "
+              "raw data(i.e. pages.img)?" % exc.magic, file=sys.stderr)
         sys.exit(1)
 
     if opts['pretty']:
@@ -63,9 +60,9 @@ def encode(opts):
     try:
         img = json.load(inf(opts))
     except UnicodeDecodeError:
-        print("Cannot read JSON.\n"\
-          "Maybe you are feeding me an image with protobuf data? "\
-          "Encode expects JSON input.", file=sys.stderr)
+        print("Cannot read JSON.\n"
+              "Maybe you are feeding me an image with protobuf data? "
+              "Encode expects JSON input.", file=sys.stderr)
         sys.exit(1)
     pycriu.images.dump(img, outf(opts, False))
 
@@ -135,7 +132,7 @@ def ftype_find_in_files(opts, ft, fid):
     if files_img is None:
         try:
             files_img = pycriu.images.load(dinf(opts, "files.img"))['entries']
-        except:
+        except Exception:
             files_img = []
 
     if len(files_img) == 0:
@@ -326,12 +323,12 @@ def explore_rss(opts):
         pvmi = -1
         for pm in pms[1:]:
             pstr = '\t%lx / %-8d' % (pm['vaddr'], pm['nr_pages'])
-            while vmas[vmi]['end'] <= pm['vaddr']:
+            while vmi < len(vmas) and vmas[vmi]['end'] <= pm['vaddr']:
                 vmi += 1
 
             pme = pm['vaddr'] + (pm['nr_pages'] << 12)
             vstr = ''
-            while vmas[vmi]['start'] < pme:
+            while vmi < len(vmas) and vmas[vmi]['start'] < pme:
                 vma = vmas[vmi]
                 if vmi == pvmi:
                     vstr += ' ~'
@@ -368,6 +365,7 @@ def main():
     desc = 'CRiu Image Tool'
     parser = argparse.ArgumentParser(
         description=desc, formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument('--version', action='version', version=__version__)
 
     subparsers = parser.add_subparsers(
         help='Use crit CMD --help for command-specific help')
@@ -377,8 +375,7 @@ def main():
         'decode', help='convert criu image from binary type to json')
     decode_parser.add_argument(
         '--pretty',
-        help=
-        'Multiline with indents and some numerical fields in field-specific format',
+        help='Multiline with indents and some numerical fields in field-specific format',
         action='store_true')
     decode_parser.add_argument(
         '-i',
diff --git a/crit/pycriu b/crit/pycriu
deleted file mode 120000
index d13a8790a..000000000
--- a/crit/pycriu
+++ /dev/null
@@ -1 +0,0 @@
-../lib/py/
\ No newline at end of file
diff --git a/crit/pyproject.toml b/crit/pyproject.toml
new file mode 100644
index 000000000..f0b185eb7
--- /dev/null
+++ b/crit/pyproject.toml
@@ -0,0 +1,22 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "crit"
+description = "CRiu Image Tool"
+authors = [
+    {name = "CRIU team", email = "criu@lists.linux.dev"},
+]
+license = {text = "GPLv2"}
+dynamic = ["version"]
+requires-python = ">=3.6"
+
+[project.scripts]
+crit = "crit.__main__:main"
+
+[tool.setuptools]
+packages = ["crit"]
+
+[tool.setuptools.dynamic]
+version = {attr = "crit.__version__"}
diff --git a/crit/setup.cfg b/crit/setup.cfg
new file mode 100644
index 000000000..37895923f
--- /dev/null
+++ b/crit/setup.cfg
@@ -0,0 +1,20 @@
+# Configuring setuptools using pyproject.toml files was introduced in setuptools 61.0.0
+# https://setuptools.pypa.io/en/latest/history.html#v61-0-0
+# For older versions of setuptools, we need to use the setup.cfg file
+# https://setuptools.pypa.io/en/latest/userguide/declarative_config.html#declarative-config
+
+[metadata]
+name = crit
+description = CRiu Image Tool
+author = CRIU team
+author_email = criu@lists.linux.dev
+license = GPLv2
+version = attr: crit.__version__
+
+[options]
+packages = crit
+python_requires = >=3.6
+
+[options.entry_points]
+console_scripts =
+    crit = crit.__main__:main
diff --git a/coredump/coredump-python3 b/crit/setup.py
old mode 100755
new mode 100644
similarity index 55%
rename from coredump/coredump-python3
rename to crit/setup.py
index 3032dbadf..618ac1de4
--- a/coredump/coredump-python3
+++ b/crit/setup.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
+import setuptools
 
-import coredump
 
 if __name__ == '__main__':
-    coredump.main()
+    setuptools.setup()
diff --git a/criu/Makefile b/criu/Makefile
index 55bdb1b7a..bafdd980b 100644
--- a/criu/Makefile
+++ b/criu/Makefile
@@ -85,7 +85,7 @@ $(obj)/%: pie
 
 $(obj)/criu: $(PROGRAM-BUILTINS)
 	$(call msg-link, $@)
-	$(Q) $(CC) $(CFLAGS) $^ $(LIBS) $(WRAPFLAGS) $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@
+	$(Q) $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LIBS) $(WRAPFLAGS) $(GMONLDOPT) -rdynamic -o $@
 
 UNIT-BUILTINS		+= $(obj)/util.o
 UNIT-BUILTINS		+= $(obj)/config.o
@@ -102,7 +102,7 @@ $(obj)/unittest/built-in.o: .FORCE
 
 $(obj)/unittest/unittest: $(UNIT-BUILTINS)
 	$(call msg-link, $@)
-	$(Q) $(CC) $(CFLAGS) $^ $(LIBS) $(WRAPFLAGS) $(LDFLAGS) -rdynamic -o $@
+	$(Q) $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LIBS) $(WRAPFLAGS) -rdynamic -o $@
 
 unittest: $(obj)/unittest/unittest
 	$(Q) $(obj)/unittest/$@
@@ -145,10 +145,8 @@ install: $(obj)/criu
 	$(Q) install -m 644 $(UAPI_HEADERS) $(DESTDIR)$(INCLUDEDIR)/criu/
 	$(Q) mkdir -p $(DESTDIR)$(LIBEXECDIR)/criu/scripts
 	$(Q) install -m 755 scripts/systemd-autofs-restart.sh $(DESTDIR)$(LIBEXECDIR)/criu/scripts
-ifeq ($(PYTHON),python3)
 	$(E) "  INSTALL " scripts/criu-ns
 	$(Q) install -m 755 scripts/criu-ns $(DESTDIR)$(SBINDIR)
-endif
 .PHONY: install
 
 uninstall:
diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
index 22108cce0..ba6132d2f 100644
--- a/criu/Makefile.crtools
+++ b/criu/Makefile.crtools
@@ -74,6 +74,7 @@ obj-y			+= sk-unix.o
 obj-y			+= sockets.o
 obj-y			+= stats.o
 obj-y			+= string.o
+obj-y			+= setproctitle.o
 obj-y			+= sysctl.o
 obj-y			+= sysfs_parse.o
 obj-y			+= timerfd.o
@@ -91,6 +92,8 @@ obj-y			+= servicefd.o
 obj-y			+= pie-util-vdso.o
 obj-y			+= vdso.o
 obj-y			+= timens.o
+obj-y			+= timer.o
+obj-y			+= sigact.o
 obj-$(CONFIG_HAS_LIBBPF)	+= bpfmap.o
 obj-$(CONFIG_COMPAT)	+= pie-util-vdso-elf32.o
 CFLAGS_pie-util-vdso-elf32.o	+= -DCONFIG_VDSO_32
@@ -98,6 +101,7 @@ obj-$(CONFIG_COMPAT)	+= vdso-compat.o
 CFLAGS_REMOVE_vdso-compat.o	+= $(CFLAGS-ASAN) $(CFLAGS-GCOV)
 obj-y			+= pidfd-store.o
 obj-y			+= hugetlb.o
+obj-y			+= pidfd.o
 
 PROTOBUF_GEN := scripts/protobuf-gen.sh
 
diff --git a/criu/Makefile.packages b/criu/Makefile.packages
index 13c346f44..3e2e6efd1 100644
--- a/criu/Makefile.packages
+++ b/criu/Makefile.packages
@@ -6,7 +6,7 @@ REQ-RPM-PKG-NAMES	+= protobuf-devel
 REQ-RPM-PKG-NAMES	+= protobuf-python
 REQ-RPM-PKG-NAMES	+= libnl3-devel
 REQ-RPM-PKG-NAMES	+= libcap-devel
-REQ-RPM-PKG-NAMES	+= $(PYTHON)-future
+REQ-RPM-PKG-NAMES	+= libuuid-devel
 
 REQ-RPM-PKG-TEST-NAMES  += libaio-devel
 
@@ -15,22 +15,19 @@ REQ-DEB-PKG-NAMES	+= libprotobuf-c-dev
 REQ-DEB-PKG-NAMES	+= protobuf-c-compiler
 REQ-DEB-PKG-NAMES	+= protobuf-compiler
 REQ-DEB-PKG-NAMES	+= $(PYTHON)-protobuf
-REQ-DEB-PKG-NAMES	+= $(PYTHON)-future
 REQ-DEB-PKG-NAMES	+= libnl-3-dev
 REQ-DEB-PKG-NAMES	+= libcap-dev
+REQ-DEB-PKG-NAMES	+= uuid-dev
 
 REQ-DEB-PKG-TEST-NAMES	+= $(PYTHON)-yaml
 REQ-DEB-PKG-TEST-NAMES	+= libaio-dev
 
-ifeq ($(PYTHON),python3)
 REQ-DEB-PKG-TEST-NAMES	+= libaio-dev
 
 REQ-RPM-PKG-TEST-NAMES	+= $(PYTHON)-PyYAML
-else
-REQ-RPM-PKG-TEST-NAMES	+= $(PYTHON)-pyyaml
-endif
 
-export LIBS		+= -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet
+
+export LIBS		+= -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet -luuid
 
 check-packages-failed:
 	$(warning Can not find some of the required libraries)
diff --git a/criu/action-scripts.c b/criu/action-scripts.c
index 1ce6d9c10..6f7900186 100644
--- a/criu/action-scripts.c
+++ b/criu/action-scripts.c
@@ -18,6 +18,7 @@
 #include "common/scm.h"
 
 static const char *action_names[ACT_MAX] = {
+	[ACT_PRE_STREAM] = "pre-stream",
 	[ACT_PRE_DUMP] = "pre-dump",
 	[ACT_POST_DUMP] = "post-dump",
 	[ACT_PRE_RESTORE] = "pre-restore",
@@ -30,6 +31,7 @@ static const char *action_names[ACT_MAX] = {
 	[ACT_POST_RESUME] = "post-resume",
 	[ACT_ORPHAN_PTS_MASTER] = "orphan-pts-master",
 	[ACT_STATUS_READY] = "status-ready",
+	[ACT_QUERY_EXT_FILES] = "query-ext-files",
 };
 
 struct script {
@@ -51,6 +53,9 @@ static int run_shell_scripts(const char *action)
 #define ENV_IMGDIR  0x1
 #define ENV_ROOTPID 0x2
 
+	if (list_empty(&scripts))
+		return 0;
+
 	if (setenv("CRTOOLS_SCRIPT_ACTION", action, 1)) {
 		pr_perror("Can't set CRTOOLS_SCRIPT_ACTION=%s", action);
 		return -1;
@@ -111,6 +116,20 @@ int rpc_send_fd(enum script_actions act, int fd)
 	return send_criu_rpc_script(act, (char *)action, rpc_sk, fd);
 }
 
+int rpc_query_external_files(void)
+{
+	int rpc_sk;
+
+	if (scripts_mode != SCRIPTS_RPC)
+		return 0;
+
+	rpc_sk = get_service_fd(RPC_SK_OFF);
+	if (rpc_sk < 0)
+		return -1;
+
+	return exec_rpc_query_external_files((char *)action_names[ACT_QUERY_EXT_FILES], rpc_sk);
+}
+
 int run_scripts(enum script_actions act)
 {
 	int ret = 0;
@@ -118,23 +137,24 @@ int run_scripts(enum script_actions act)
 
 	pr_debug("Running %s scripts\n", action);
 
-	if (scripts_mode == SCRIPTS_NONE)
+	switch (scripts_mode) {
+	case SCRIPTS_NONE:
 		return 0;
-
-	if (scripts_mode == SCRIPTS_RPC) {
+	case SCRIPTS_RPC:
 		ret = rpc_send_fd(act, -1);
-		goto out;
-	}
-
-	if (scripts_mode == SCRIPTS_SHELL) {
+		if (ret)
+			break;
+		/* Enable scripts from config file in RPC mode (fallthrough) */
+	case SCRIPTS_SHELL:
 		ret = run_shell_scripts(action);
-		goto out;
+		break;
+	default:
+		BUG();
 	}
 
-	BUG();
-out:
 	if (ret)
 		pr_err("One of more action scripts failed\n");
+
 	return ret;
 }
 
@@ -142,8 +162,9 @@ int add_script(char *path)
 {
 	struct script *script;
 
-	BUG_ON(scripts_mode == SCRIPTS_RPC);
-	scripts_mode = SCRIPTS_SHELL;
+	/* Set shell mode when a script is added but don't overwrite RPC mode */
+	if (scripts_mode == SCRIPTS_NONE)
+		scripts_mode = SCRIPTS_SHELL;
 
 	script = xmalloc(sizeof(struct script));
 	if (script == NULL)
@@ -169,7 +190,6 @@ int add_rpc_notify(int sk)
 		return -1;
 	}
 
-	BUG_ON(scripts_mode == SCRIPTS_SHELL);
 	scripts_mode = SCRIPTS_RPC;
 
 	if (install_service_fd(RPC_SK_OFF, fd) < 0)
diff --git a/criu/apparmor.c b/criu/apparmor.c
index 67553c8f1..48b639216 100644
--- a/criu/apparmor.c
+++ b/criu/apparmor.c
@@ -108,7 +108,7 @@ static int collect_profile(char *path, int offset, char *dir, AaNamespace *ns)
 		return -1;
 	aa_policy__init(cur);
 
-	strlcat(path + my_offset, "name", PATH_MAX - my_offset);
+	__strlcat(path + my_offset, "name", PATH_MAX - my_offset);
 	f = fopen(path, "r");
 	if (!f) {
 		xfree(cur);
@@ -124,7 +124,7 @@ static int collect_profile(char *path, int offset, char *dir, AaNamespace *ns)
 		return -1;
 	}
 
-	strlcpy(path + my_offset, "raw_data", PATH_MAX - my_offset);
+	__strlcpy(path + my_offset, "raw_data", PATH_MAX - my_offset);
 	fd = open(path, O_RDONLY);
 	if (fd < 0) {
 		pr_perror("failed to open aa policy %s", path);
@@ -207,8 +207,6 @@ static int by_time(const struct dirent **de1, const struct dirent **de2)
 	} else {
 		if (sb1.st_mtim.tv_sec < sb2.st_mtim.tv_sec)
 			return -1;
-		if (sb1.st_mtim.tv_sec == sb2.st_mtim.tv_sec)
-			return 0;
 		return 1;
 	}
 }
@@ -471,6 +469,7 @@ static void *get_suspend_policy(char *name, off_t *len)
 	ret = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
 	if (ret == MAP_FAILED) {
 		pr_perror("mmap of %s failed", file);
+		ret = NULL;
 		goto out;
 	}
 
@@ -520,13 +519,13 @@ static int write_aa_policy(AaNamespace *ns, char *path, int offset, char *rewrit
 
 			tmp = *end;
 			*end = 0;
-			strlcpy(namespace, rewrite_pos + 1, sizeof(namespace));
+			__strlcpy(namespace, rewrite_pos + 1, sizeof(namespace));
 			*end = tmp;
 
 			break;
 		}
 		default:
-			strlcpy(namespace, ns->name, sizeof(namespace));
+			__strlcpy(namespace, ns->name, sizeof(namespace));
 			for (i = 0; i < ns->n_policies; i++) {
 				if (strcmp(ns->policies[i]->name, rewrite_pos))
 					pr_warn("binary rewriting of apparmor policies not supported right now, not renaming %s to %s\n",
@@ -551,8 +550,8 @@ static int write_aa_policy(AaNamespace *ns, char *path, int offset, char *rewrit
 			goto fail;
 	}
 
-	ret = snprintf(path + offset + my_offset, sizeof(path) - offset - my_offset, "/.replace");
-	if (ret < 0 || ret >= sizeof(path) - offset - my_offset) {
+	ret = snprintf(path + offset + my_offset, PATH_MAX - offset - my_offset, "/.replace");
+	if (ret < 0 || ret >= PATH_MAX - offset - my_offset) {
 		pr_err("snprintf failed\n");
 		goto fail;
 	}
diff --git a/criu/arch/aarch64/Makefile b/criu/arch/aarch64/Makefile
index b26487367..b87fcaa5b 100644
--- a/criu/arch/aarch64/Makefile
+++ b/criu/arch/aarch64/Makefile
@@ -6,3 +6,4 @@ obj-y			+= cpu.o
 obj-y			+= crtools.o
 obj-y			+= sigframe.o
 obj-y			+= bitops.o
+obj-y			+= gcs.o
\ No newline at end of file
diff --git a/criu/arch/aarch64/crtools.c b/criu/arch/aarch64/crtools.c
index e87b8629a..2e89f9ce3 100644
--- a/criu/arch/aarch64/crtools.c
+++ b/criu/arch/aarch64/crtools.c
@@ -1,5 +1,6 @@
 #include <string.h>
 #include <unistd.h>
+#include <linux/auxvec.h>
 
 #include <linux/elf.h>
 
@@ -11,6 +12,7 @@
 #include "common/compiler.h"
 #include <compel/ptrace.h>
 #include "asm/dump.h"
+#include "asm/gcs-types.h"
 #include "protobuf.h"
 #include "images/core.pb-c.h"
 #include "images/creds.pb-c.h"
@@ -20,12 +22,137 @@
 #include "cpu.h"
 #include "restorer.h"
 #include "compel/infect.h"
+#include "pstree.h"
+#include <stdbool.h>
+
+/*
+ * cr_user_pac_* are a copy of the corresponding uapi structs
+ * in arch/arm64/include/uapi/asm/ptrace.h
+ */
+struct cr_user_pac_address_keys {
+	__uint128_t apiakey;
+	__uint128_t apibkey;
+	__uint128_t apdakey;
+	__uint128_t apdbkey;
+};
+
+struct cr_user_pac_generic_keys {
+	__uint128_t apgakey;
+};
+
+/*
+ * The following HWCAP constants are copied from
+ * arch/arm64/include/uapi/asm/hwcap.h
+ */
+#ifndef HWCAP_PACA
+#define HWCAP_PACA (1 << 30)
+#endif
+
+#ifndef HWCAP_PACG
+#define HWCAP_PACG (1UL << 31)
+#endif
+
+/*
+ * The following NT_ARM_PAC constants are copied from
+ * include/uapi/linux/elf.h
+ */
+#ifndef NT_ARM_PACA_KEYS
+#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */
+#endif
+
+#ifndef NT_ARM_PACG_KEYS
+#define NT_ARM_PACG_KEYS 0x408
+#endif
+
+#ifndef NT_ARM_PAC_ENABLED_KEYS
+#define NT_ARM_PAC_ENABLED_KEYS	0x40a	/* AArch64 pointer authentication enabled keys. */
+#endif
+
+extern unsigned long getauxval(unsigned long type);
 
 #define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))(src)->e
 
-int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpsimd)
+static int save_pac_keys(int pid, CoreEntry *core)
+{
+	struct cr_user_pac_address_keys paca;
+	struct cr_user_pac_generic_keys pacg;
+	PacKeys *pac_entry;
+	long pac_enabled_key;
+	struct iovec iov;
+	int ret;
+
+	unsigned long hwcaps = getauxval(AT_HWCAP);
+
+	pac_entry = xmalloc(sizeof(PacKeys));
+	if (!pac_entry)
+		return -1;
+	core->ti_aarch64->pac_keys = pac_entry;
+	pac_keys__init(pac_entry);
+
+	if (hwcaps & HWCAP_PACA) {
+		PacAddressKeys *pac_address_keys;
+
+		pr_debug("%d: Dumping address authentication keys\n", pid);
+		iov.iov_base = &paca;
+		iov.iov_len = sizeof(paca);
+		if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_ARM_PACA_KEYS, &iov))) {
+			pr_perror("Failed to get address authentication key for %d", pid);
+			return -1;
+		}
+		pac_address_keys = xmalloc(sizeof(PacAddressKeys));
+		if (!pac_address_keys)
+			return -1;
+		pac_address_keys__init(pac_address_keys);
+		pac_entry->pac_address_keys = pac_address_keys;
+		pac_address_keys->apiakey_lo = paca.apiakey;
+		pac_address_keys->apiakey_hi = paca.apiakey >> 64;
+		pac_address_keys->apibkey_lo = paca.apibkey;
+		pac_address_keys->apibkey_hi = paca.apibkey >> 64;
+		pac_address_keys->apdakey_lo = paca.apdakey;
+		pac_address_keys->apdakey_hi = paca.apdakey >> 64;
+		pac_address_keys->apdbkey_lo = paca.apdbkey;
+		pac_address_keys->apdbkey_hi = paca.apdbkey >> 64;
+
+		iov.iov_base = &pac_enabled_key;
+		iov.iov_len = sizeof(pac_enabled_key);
+		ret = ptrace(PTRACE_GETREGSET, pid, NT_ARM_PAC_ENABLED_KEYS, &iov);
+		if (ret) {
+			pr_perror("Failed to get authentication key mask for %d", pid);
+			return -1;
+		}
+
+		pac_address_keys->pac_enabled_key = pac_enabled_key;
+
+	}
+	if (hwcaps & HWCAP_PACG) {
+		PacGenericKeys *pac_generic_keys;
+
+		pr_debug("%d: Dumping generic authentication keys\n", pid);
+		iov.iov_base = &pacg;
+		iov.iov_len = sizeof(pacg);
+		if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_ARM_PACG_KEYS, &iov))) {
+			pr_perror("Failed to get a generic authantication key for %d", pid);
+			return -1;
+		}
+		pac_generic_keys = xmalloc(sizeof(PacGenericKeys));
+		if (!pac_generic_keys)
+			return -1;
+		pac_generic_keys__init(pac_generic_keys);
+		pac_entry->pac_generic_keys = pac_generic_keys;
+		pac_generic_keys->apgakey_lo = pacg.apgakey;
+		pac_generic_keys->apgakey_hi = pacg.apgakey >> 64;
+	}
+	return 0;
+}
+
+int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpsimd)
 {
 	int i;
+	struct cr_user_gcs gcs_live;
+	struct iovec gcs_iov = {
+		.iov_base = &gcs_live,
+		.iov_len = sizeof(gcs_live),
+	};
 	CoreEntry *core = x;
 
 	// Save the Aarch64 CPU state
@@ -37,11 +164,24 @@ int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpsi
 
 	// Save the FP/SIMD state
 	for (i = 0; i < 32; ++i) {
-		core->ti_aarch64->fpsimd->vregs[2 * i] = fpsimd->vregs[i];
-		core->ti_aarch64->fpsimd->vregs[2 * i + 1] = fpsimd->vregs[i] >> 64;
+		core->ti_aarch64->fpsimd->vregs[2 * i] = fpsimd->fpstate.vregs[i];
+		core->ti_aarch64->fpsimd->vregs[2 * i + 1] = fpsimd->fpstate.vregs[i] >> 64;
+	}
+	assign_reg(core->ti_aarch64->fpsimd, &fpsimd->fpstate, fpsr);
+	assign_reg(core->ti_aarch64->fpsimd, &fpsimd->fpstate, fpcr);
+
+	if (save_pac_keys(pid, core))
+		return -1;
+
+	/* Save the GCS state */
+	if (compel_host_supports_gcs()) {
+		if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) < 0) {
+			pr_perror("Failed to get GCS for %d", pid);
+			return -1;
+		}
+		core->ti_aarch64->gcs->gcspr_el0 = gcs_live.gcspr_el0;
+		core->ti_aarch64->gcs->features_enabled = gcs_live.features_enabled;
 	}
-	assign_reg(core->ti_aarch64->fpsimd, fpsimd, fpsr);
-	assign_reg(core->ti_aarch64->fpsimd, fpsimd, fpcr);
 
 	return 0;
 }
@@ -51,6 +191,7 @@ int arch_alloc_thread_info(CoreEntry *core)
 	ThreadInfoAarch64 *ti_aarch64;
 	UserAarch64RegsEntry *gpregs;
 	UserAarch64FpsimdContextEntry *fpsimd;
+	UserAarch64GcsEntry *gcs;
 
 	ti_aarch64 = xmalloc(sizeof(*ti_aarch64));
 	if (!ti_aarch64)
@@ -80,6 +221,15 @@ int arch_alloc_thread_info(CoreEntry *core)
 	if (!fpsimd->vregs)
 		goto err;
 
+	/* Allocate & init GCS */
+	if (compel_host_supports_gcs()) {
+		gcs = xmalloc(sizeof(*gcs));
+		if (!gcs)
+			goto err;
+		user_aarch64_gcs_entry__init(gcs);
+		ti_aarch64->gcs = gcs;
+	}
+
 	return 0;
 err:
 	return -1;
@@ -92,6 +242,12 @@ void arch_free_thread_info(CoreEntry *core)
 			xfree(CORE_THREAD_ARCH_INFO(core)->fpsimd->vregs);
 			xfree(CORE_THREAD_ARCH_INFO(core)->fpsimd);
 		}
+		if (CORE_THREAD_ARCH_INFO(core)->pac_keys) {
+			PacKeys *pac_entry = CORE_THREAD_ARCH_INFO(core)->pac_keys;
+			xfree(pac_entry->pac_address_keys);
+			xfree(pac_entry->pac_generic_keys);
+			xfree(pac_entry);
+		}
 		xfree(CORE_THREAD_ARCH_INFO(core)->gpregs->regs);
 		xfree(CORE_THREAD_ARCH_INFO(core)->gpregs);
 		xfree(CORE_THREAD_ARCH_INFO(core));
@@ -103,6 +259,7 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
 {
 	int i;
 	struct fpsimd_context *fpsimd = RT_SIGFRAME_FPU(sigframe);
+	struct gcs_context *gcs;
 
 	if (core->ti_aarch64->fpsimd->n_vregs != 64)
 		return 1;
@@ -116,6 +273,18 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
 	fpsimd->head.magic = FPSIMD_MAGIC;
 	fpsimd->head.size = sizeof(*fpsimd);
 
+	if (compel_host_supports_gcs()) {
+		gcs = RT_SIGFRAME_GCS(sigframe);
+
+		pr_debug("sigframe gcspr %llx enabled %llx\n", gcs->gcspr, gcs->features_enabled);
+
+		gcs->head.magic = GCS_MAGIC;
+		gcs->head.size = sizeof(*gcs);
+		gcs->reserved = 0;
+		gcs->gcspr = core->ti_aarch64->gcs->gcspr_el0 - 8;
+		gcs->features_enabled = core->ti_aarch64->gcs->features_enabled;
+	}
+
 	return 0;
 }
 
@@ -135,3 +304,83 @@ int restore_gpregs(struct rt_sigframe *f, UserRegsEntry *r)
 
 	return 0;
 }
+
+int arch_ptrace_restore(int pid, struct pstree_item *item)
+{
+	unsigned long hwcaps = getauxval(AT_HWCAP);
+	struct cr_user_pac_address_keys upaca;
+	struct cr_user_pac_generic_keys upacg;
+	PacAddressKeys *paca;
+	PacGenericKeys *pacg;
+	long pac_enabled_keys;
+	struct iovec iov;
+	int ret;
+
+
+	pr_debug("%d: Restoring PAC keys\n", pid);
+
+	paca = &rsti(item)->arch_info.pac_address_keys;
+	pacg = &rsti(item)->arch_info.pac_generic_keys;
+	if (rsti(item)->arch_info.has_paca) {
+		if (!(hwcaps & HWCAP_PACA)) {
+			pr_err("PACG support is required from the source system.\n");
+			return 1;
+		}
+		pac_enabled_keys = rsti(item)->arch_info.pac_address_keys.pac_enabled_key;
+
+		upaca.apiakey = paca->apiakey_lo + ((__uint128_t)paca->apiakey_hi << 64);
+		upaca.apibkey = paca->apibkey_lo + ((__uint128_t)paca->apibkey_hi << 64);
+		upaca.apdakey = paca->apdakey_lo + ((__uint128_t)paca->apdakey_hi << 64);
+		upaca.apdbkey = paca->apdbkey_lo + ((__uint128_t)paca->apdbkey_hi << 64);
+
+		iov.iov_base = &upaca;
+		iov.iov_len = sizeof(upaca);
+
+		if ((ret = ptrace(PTRACE_SETREGSET, pid, NT_ARM_PACA_KEYS, &iov))) {
+			pr_perror("Failed to set address authentication keys for %d", pid);
+			return 1;
+		}
+		iov.iov_base = &pac_enabled_keys;
+		iov.iov_len = sizeof(pac_enabled_keys);
+		if ((ret = ptrace(PTRACE_SETREGSET, pid, NT_ARM_PAC_ENABLED_KEYS, &iov))) {
+			pr_perror("Failed to set enabled key mask for %d", pid);
+			return 1;
+		}
+	}
+
+	if (rsti(item)->arch_info.has_pacg) {
+		if (!(hwcaps & HWCAP_PACG)) {
+			pr_err("PACG support is required from the source system.\n");
+			return 1;
+		}
+		upacg.apgakey = pacg->apgakey_lo + ((__uint128_t)pacg->apgakey_hi << 64);
+		iov.iov_base = &upacg;
+		iov.iov_len = sizeof(upacg);
+		if ((ret = ptrace(PTRACE_SETREGSET, pid, NT_ARM_PACG_KEYS, &iov))) {
+			pr_perror("Failed to set the generic authentication key for %d", pid);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+void arch_rsti_init(struct pstree_item *p)
+{
+	PacKeys *pac_keys = p->core[0]->ti_aarch64->pac_keys;
+
+	rsti(p)->arch_info.has_paca = false;
+	rsti(p)->arch_info.has_pacg = false;
+
+	if (!pac_keys)
+		return;
+
+	if (pac_keys->pac_address_keys) {
+		rsti(p)->arch_info.has_paca = true;
+		rsti(p)->arch_info.pac_address_keys = *pac_keys->pac_address_keys;
+	}
+	if (pac_keys->pac_generic_keys) {
+		rsti(p)->arch_info.has_pacg = true;
+		rsti(p)->arch_info.pac_generic_keys = *pac_keys->pac_generic_keys;
+	}
+}
diff --git a/criu/arch/aarch64/gcs.c b/criu/arch/aarch64/gcs.c
new file mode 100644
index 000000000..4bdb9d2e4
--- /dev/null
+++ b/criu/arch/aarch64/gcs.c
@@ -0,0 +1,157 @@
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+
+#include <common/list.h>
+#include <compel/cpu.h>
+
+#include "asm/gcs-types.h"
+#include "pstree.h"
+#include "restorer.h"
+#include "rst-malloc.h"
+#include "vma.h"
+
+#include <sys/auxv.h>
+#include <stdbool.h>
+
+static bool task_has_gcs_enabled(UserAarch64GcsEntry *gcs)
+{
+	return gcs && (gcs->features_enabled & PR_SHADOW_STACK_ENABLE) != 0;
+}
+
+static bool host_supports_gcs(void)
+{
+	unsigned long hwcap = getauxval(AT_HWCAP);
+	return (hwcap & HWCAP_GCS) != 0;
+}
+
+static bool task_needs_gcs(struct pstree_item *item, CoreEntry *core)
+{
+	UserAarch64GcsEntry *gcs;
+
+	if (!task_alive(item))
+		return false;
+
+	gcs = core->ti_aarch64->gcs;
+
+	if (task_has_gcs_enabled(gcs)) {
+		if (!host_supports_gcs()) {
+			pr_warn_once("Restoring task with GCS on non-GCS host\n");
+			return false;
+		}
+
+		pr_info("Restoring task with GCS\n");
+		return true;
+	}
+
+	pr_info("Restoring a task without GCS\n");
+	return false;
+}
+
+static int gcs_prepare_task(struct vm_area_list *vmas,
+			    struct rst_shstk_info *gcs)
+{
+	struct vma_area *vma;
+
+	list_for_each_entry(vma, &vmas->h, list) {
+		if (vma_area_is(vma, VMA_AREA_SHSTK) &&
+		    in_vma_area(vma, gcs->gcspr_el0)) {
+			unsigned long premapped_addr = vma->premmaped_addr;
+			unsigned long size = vma_area_len(vma);
+
+			gcs->vma_start = vma->e->start;
+			gcs->vma_size = size;
+			gcs->premapped_addr = premapped_addr;
+
+			return 0;
+		}
+	}
+
+	pr_err("Unable to find a shadow stack vma: %lx\n", gcs->gcspr_el0);
+	return -1;
+}
+
+int arch_gcs_prepare(struct pstree_item *item, CoreEntry *core,
+		     struct task_restore_args *ta)
+{
+	int i;
+	struct thread_restore_args *args_array = (struct thread_restore_args *)(&ta[1]);
+	struct vm_area_list *vmas = &rsti(item)->vmas;
+	struct rst_shstk_info *gcs = &ta->shstk;
+
+	if (!task_needs_gcs(item, core))
+		return 0;
+
+	gcs->gcspr_el0 = core->ti_aarch64->gcs->gcspr_el0;
+	gcs->features_enabled = core->ti_aarch64->gcs->features_enabled;
+
+	if (gcs_prepare_task(vmas, gcs)) {
+		pr_err("gcs: failed to prepare shadow stack memory\n");
+		return -1;
+	}
+
+	for (i = 0; i < item->nr_threads; i++) {
+		struct thread_restore_args *thread_args = &args_array[i];
+
+		core = item->core[i];
+		gcs = &thread_args->shstk;
+
+		gcs->gcspr_el0 = core->ti_aarch64->gcs->gcspr_el0;
+		gcs->features_enabled = core->ti_aarch64->gcs->features_enabled;
+
+		if (gcs_prepare_task(vmas, gcs)) {
+			pr_err("gcs: failed to prepare GCS memory\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
+			  int (*func)(void *arg), void *arg)
+{
+	int fret;
+	unsigned long flags = PR_SHADOW_STACK_ENABLE |
+			      PR_SHADOW_STACK_PUSH |
+			      PR_SHADOW_STACK_WRITE;
+
+	long ret, x1_after, x8_after;
+
+	/* If task doesn't need GCS, just call func */
+	if (!task_needs_gcs(item, core)) {
+		return func(arg);
+	}
+
+	pr_debug("gcs: GCS enable SVC about to fire: x8=%d x0=%d x1=0x%lx\n",
+		 __NR_prctl, PR_SET_SHADOW_STACK_STATUS, flags);
+
+	asm volatile(
+		"mov x0, %3\n"	// x0 = PR_SET_SHADOW_STACK_STATUS (75)
+		"mov x1, %4\n"	// x1 = flags
+		"mov x2, xzr\n" // x2 = 0
+		"mov x3, xzr\n" // x3 = 0
+		"mov x4, xzr\n" // x4 = 0
+		"mov x8, %5\n"	// x8 = __NR_prctl (167)
+		"svc #0\n"	// Invoke syscall
+		"mov %0, x0\n"	// Capture return value
+		"mov %1, x1\n"	// Capture x1 after
+		"mov %2, x8\n"	// Capture x8 after
+		: "=r"(ret), "=r"(x1_after), "=r"(x8_after)
+		: "i"(PR_SET_SHADOW_STACK_STATUS), // x0 - %3rd
+		  "r"(flags),			   // x1 - %4th
+		  "i"(__NR_prctl)		   // x8 - %5th
+		: "x0", "x1", "x2", "x3", "x4", "x8", "memory", "cc");
+
+	pr_info("gcs: after SVC: ret=%ld x1=%ld x8=%ld\n", ret, x1_after, x8_after);
+
+	if (ret != 0) {
+		int err = errno;
+		pr_err("gcs: failed to enable GCS: ret=%ld errno=%d (%s)\n", ret, err, strerror(err));
+		return -1;
+	}
+
+	fret = func(arg);
+	exit(fret);
+
+	return -1;
+}
diff --git a/criu/arch/aarch64/include/asm/dump.h b/criu/arch/aarch64/include/asm/dump.h
index 90cd8bca8..ecab061c3 100644
--- a/criu/arch/aarch64/include/asm/dump.h
+++ b/criu/arch/aarch64/include/asm/dump.h
@@ -1,7 +1,7 @@
 #ifndef __CR_ASM_DUMP_H__
 #define __CR_ASM_DUMP_H__
 
-extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *);
+extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *);
 extern int arch_alloc_thread_info(CoreEntry *core);
 extern void arch_free_thread_info(CoreEntry *core);
 
diff --git a/criu/arch/aarch64/include/asm/gcs.h b/criu/arch/aarch64/include/asm/gcs.h
new file mode 100644
index 000000000..28faa23b7
--- /dev/null
+++ b/criu/arch/aarch64/include/asm/gcs.h
@@ -0,0 +1,196 @@
+#ifndef __CR_ASM_GCS_H__
+#define __CR_ASM_GCS_H__
+
+#include <asm/gcs-types.h>
+
+struct rst_shstk_info {
+	unsigned long vma_start;      /* start of GCS VMA */
+	unsigned long vma_size;	      /* size of GCS VMA */
+	unsigned long premapped_addr; /* premapped buffer */
+	unsigned long tmp_gcs;	      /* temp area for GCS if needed */
+	u64 gcspr_el0;		      /* GCS pointer */
+	u64 features_enabled;	      /* GCS flags */
+};
+
+#define rst_shstk_info rst_shstk_info
+
+struct task_restore_args;
+struct pstree_item;
+
+int arch_gcs_prepare(struct pstree_item *item, CoreEntry *core,
+		     struct task_restore_args *ta);
+#define arch_shstk_prepare arch_gcs_prepare
+
+int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
+			  int (*func)(void *arg), void *arg);
+#define arch_shstk_trampoline arch_shstk_trampoline
+
+static always_inline void shstk_set_restorer_stack(struct rst_shstk_info *gcs, void *ptr)
+{
+	gcs->tmp_gcs = (long unsigned)ptr;
+}
+#define shstk_set_restorer_stack shstk_set_restorer_stack
+
+static always_inline long shstk_restorer_stack_size(void)
+{
+	return PAGE_SIZE;
+}
+#define shstk_restorer_stack_size shstk_restorer_stack_size
+
+#ifdef CR_NOGLIBC
+#include <compel/plugins/std/syscall.h>
+#include <compel/cpu.h>
+#include "vma.h"
+
+static inline unsigned long gcs_map(unsigned long addr, unsigned long size, unsigned int flags)
+{
+	long gcspr = sys_map_shadow_stack(addr, size, flags);
+	pr_info("gcs: syscall: map_shadow_stack at=%lx size=%ld\n", addr, size);
+
+	if (gcspr < 0) {
+		pr_err("gcs: failed to map GCS at %lx: %ld\n", addr, gcspr);
+		return -1;
+	}
+
+	if (addr && gcspr != addr) {
+		pr_err("gcs: address mismatch: need %lx, got %lx\n", addr, gcspr);
+		return -1;
+	}
+
+	pr_info("gcs: mmapped GCS at %lx\n", gcspr);
+
+	return gcspr;
+}
+
+/* clang-format off */
+static always_inline void gcsss1(unsigned long *Xt)
+{
+	asm volatile (
+		"sys #3, C7, C7, #2, %0\n"
+		:
+		: "rZ" (Xt)
+		: "memory");
+}
+
+static always_inline unsigned long *gcsss2(void)
+{
+	unsigned long *Xt;
+
+	asm volatile (
+		"SYSL %0, #3, C7, C7, #3\n"
+		: "=r" (Xt)
+		:
+		: "memory");
+
+	return Xt;
+}
+
+static inline void gcsstr(unsigned long addr, unsigned long val)
+{
+	asm volatile(
+		"mov x0, %0\n"
+		"mov x1, %1\n"
+		".inst 0xd91f1c01\n"  // GCSSTR x1, [x0]
+		"mov x0, #0\n"
+		:
+		: "r"(addr), "r"(val)
+		: "x0", "x1", "memory");
+}
+/* clang-format on */
+
+static always_inline int gcs_restore(struct rst_shstk_info *gcs)
+{
+	unsigned long gcspr, val;
+
+	if (!(gcs && gcs->features_enabled & PR_SHADOW_STACK_ENABLE)) {
+		return 0;
+	}
+
+	gcspr = gcs->gcspr_el0 - 8;
+
+	val = ALIGN_DOWN(GCS_SIGNAL_CAP(gcspr), 8);
+	pr_debug("gcs: [0] GCSSTR VAL=%lx write at GCSPR=%lx\n", val, gcspr);
+	gcsstr(gcspr, val);
+
+	val = ALIGN_DOWN(GCS_SIGNAL_CAP(gcspr), 8) | GCS_CAP_VALID_TOKEN;
+	gcspr -= 8;
+	pr_debug("gcs: [1] GCSSTR VAL=%lx write at GCSPR=%lx\n", val, gcspr);
+	gcsstr(gcspr, val);
+
+	pr_debug("gcs: about to switch stacks via GCSSS1 to: %lx\n", gcspr);
+	gcsss1((unsigned long *)gcspr);
+	return 0;
+}
+#define arch_shstk_restore gcs_restore
+
+static always_inline int gcs_vma_restore(VmaEntry *vma_entry)
+{
+	unsigned long shstk, i, ret;
+	unsigned long *gcs_data = (void *)vma_premmaped_start(vma_entry);
+	unsigned long vma_size = vma_entry_len(vma_entry);
+
+	shstk = gcs_map(0, vma_size, SHADOW_STACK_SET_TOKEN);
+	if (shstk < 0) {
+		pr_err("Failed to map shadow stack at %lx: %ld\n", shstk, shstk);
+	}
+
+	/* restore shadow stack contents */
+	for (i = 0; i < vma_size / 8; i++)
+		gcsstr(shstk + i * 8, gcs_data[i]);
+
+	pr_debug("unmap %lx %ld\n", (unsigned long)gcs_data, vma_size);
+	ret = sys_munmap(gcs_data, vma_size);
+	if (ret < 0) {
+		pr_err("Failed to unmap premmaped shadow stack\n");
+		return ret;
+	}
+
+	vma_premmaped_start(vma_entry) = shstk;
+
+	return 0;
+}
+#define shstk_vma_restore gcs_vma_restore
+
+static always_inline int gcs_switch_to_restorer(struct rst_shstk_info *gcs)
+{
+	int ret;
+	unsigned long *ssp;
+	unsigned long addr;
+	unsigned long gcspr;
+
+	if (!(gcs && gcs->features_enabled & PR_SHADOW_STACK_ENABLE)) {
+		return 0;
+	}
+
+	pr_debug("gcs->premapped_addr + gcs->vma_size = %lx\n", gcs->premapped_addr + gcs->vma_size);
+	pr_debug("gcs->tmp_gcs = %lx\n", gcs->tmp_gcs);
+	addr = gcs->tmp_gcs;
+
+	if (addr % PAGE_SIZE != 0) {
+		pr_err("gcs: 0x%lx not page-aligned to size 0x%lx\n", addr, PAGE_SIZE);
+		return -1;
+	}
+
+	ret = sys_munmap((void *)addr, PAGE_SIZE);
+	if (ret < 0) {
+		pr_err("gcs: Failed to unmap aarea for dumpee GCS VMAs\n");
+		return -1;
+	}
+
+	gcspr = gcs_map(addr, PAGE_SIZE, SHADOW_STACK_SET_TOKEN);
+
+	if (gcspr == -1) {
+		pr_err("gcs: failed to gcs_map(%lx, %lx)\n", (unsigned long)addr, PAGE_SIZE);
+		return -1;
+	}
+
+	ssp = (unsigned long *)(addr + PAGE_SIZE - 8);
+	gcsss1(ssp);
+
+	return 0;
+}
+#define arch_shstk_switch_to_restorer gcs_switch_to_restorer
+
+#endif /* CR_NOGLIBC */
+
+#endif /* __CR_ASM_GCS_H__ */
diff --git a/criu/arch/aarch64/include/asm/restore.h b/criu/arch/aarch64/include/asm/restore.h
index 75e87996a..c79605c40 100644
--- a/criu/arch/aarch64/include/asm/restore.h
+++ b/criu/arch/aarch64/include/asm/restore.h
@@ -26,4 +26,14 @@ static inline void core_get_tls(CoreEntry *pcore, tls_t *ptls)
 
 int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core);
 
+#define ARCH_RST_INFO y
+struct rst_arch_info {
+	bool has_paca, has_pacg;
+	PacAddressKeys pac_address_keys;
+	PacGenericKeys pac_generic_keys;
+};
+
+int arch_ptrace_restore(int pid, struct pstree_item *item);
+void arch_rsti_init(struct pstree_item *current);
+
 #endif
diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h
index 64a9c24eb..8f3edc257 100644
--- a/criu/arch/aarch64/include/asm/restorer.h
+++ b/criu/arch/aarch64/include/asm/restorer.h
@@ -1,10 +1,11 @@
 #ifndef __CR_ASM_RESTORER_H__
 #define __CR_ASM_RESTORER_H__
 
-#include <asm/sigcontext.h>
+#include <signal.h>
 #include <sys/ucontext.h>
 
 #include "asm/types.h"
+#include "asm/gcs.h"
 #include "images/core.pb-c.h"
 
 #include <compel/asm/sigframe.h>
diff --git a/criu/arch/aarch64/include/asm/types.h b/criu/arch/aarch64/include/asm/types.h
index 363c1cae2..db118cafd 100644
--- a/criu/arch/aarch64/include/asm/types.h
+++ b/criu/arch/aarch64/include/asm/types.h
@@ -33,7 +33,16 @@ static inline uint64_t encode_pointer(void *p)
 	return (uint64_t)p;
 }
 
-#define AT_VECTOR_SIZE 40
+/**
+ * See also:
+ *   * arch/arm64/include/uapi/asm/auxvec.h
+ *   * include/linux/auxvec.h
+ *   * include/linux/mm_types.h
+ */
+#define AT_VECTOR_SIZE_BASE 22
+#define AT_VECTOR_SIZE_ARCH 2
+#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
+
 typedef uint64_t auxv_t;
 typedef uint64_t tls_t;
 
diff --git a/criu/arch/arm/crtools.c b/criu/arch/arm/crtools.c
index 26b94e157..6a5e4c89a 100644
--- a/criu/arch/arm/crtools.c
+++ b/criu/arch/arm/crtools.c
@@ -22,7 +22,7 @@
 
 #define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))((src)->ARM_##e)
 
-int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
+int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
 {
 	CoreEntry *core = x;
 
diff --git a/criu/arch/arm/include/asm/dump.h b/criu/arch/arm/include/asm/dump.h
index 485986065..b0ac5715d 100644
--- a/criu/arch/arm/include/asm/dump.h
+++ b/criu/arch/arm/include/asm/dump.h
@@ -1,7 +1,7 @@
 #ifndef __CR_ASM_DUMP_H__
 #define __CR_ASM_DUMP_H__
 
-extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *);
+extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *);
 extern int arch_alloc_thread_info(CoreEntry *core);
 extern void arch_free_thread_info(CoreEntry *core);
 
diff --git a/criu/arch/loongarch64/Makefile b/criu/arch/loongarch64/Makefile
new file mode 100644
index 000000000..4bd99eb7e
--- /dev/null
+++ b/criu/arch/loongarch64/Makefile
@@ -0,0 +1,14 @@
+builtin-name		:= crtools.built-in.o
+
+ccflags-y		+= -iquote $(obj)/include
+ccflags-y		+= -iquote criu/include -iquote include
+ccflags-y		+= $(COMPEL_UAPI_INCLUDES)
+
+asflags-y		+= -Wstrict-prototypes
+asflags-y		+= -D__ASSEMBLY__ -nostdlib -fomit-frame-pointer
+asflags-y		+= -iquote $(obj)/include
+ldflags-y		+= -r -z noexecstack
+
+obj-y			+= cpu.o
+obj-y			+= crtools.o
+obj-y			+= sigframe.o
diff --git a/criu/arch/loongarch64/cpu.c b/criu/arch/loongarch64/cpu.c
new file mode 100644
index 000000000..5559c4288
--- /dev/null
+++ b/criu/arch/loongarch64/cpu.c
@@ -0,0 +1,31 @@
+#undef LOG_PREFIX
+#define LOG_PREFIX "cpu: "
+
+int cpu_init(void)
+{
+	return 0;
+}
+
+int cpu_dump_cpuinfo(void)
+{
+	return 0;
+}
+
+int cpu_validate_cpuinfo(void)
+{
+	return 0;
+}
+
+int cpuinfo_dump(void)
+{
+	if (cpu_init())
+		return -1;
+	if (cpu_dump_cpuinfo())
+		return -1;
+	return 0;
+}
+
+int cpuinfo_check(void)
+{
+	return 0;
+}
diff --git a/criu/arch/loongarch64/crtools.c b/criu/arch/loongarch64/crtools.c
new file mode 100644
index 000000000..783951b5b
--- /dev/null
+++ b/criu/arch/loongarch64/crtools.c
@@ -0,0 +1,115 @@
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <elf.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/auxv.h>
+#include <sys/wait.h>
+
+#include "types.h"
+#include "log.h"
+#include "asm/restorer.h"
+#include "asm/parasite-syscall.h"
+#include <compel/asm/fpu.h>
+#include "asm/dump.h"
+#include "cr_options.h"
+#include "common/compiler.h"
+#include "restorer.h"
+#include "parasite-syscall.h"
+#include "util.h"
+#include "cpu.h"
+#include <compel/plugins/std/syscall-codes.h>
+#include "kerndat.h"
+
+#include "protobuf.h"
+#include "images/core.pb-c.h"
+#include "images/creds.pb-c.h"
+
+#define assign_reg(dst, src, e) (dst)->e = (__typeof__(dst->e))(src)->e
+
+int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
+{
+	int i;
+	CoreEntry *core = x;
+	UserLoongarch64GpregsEntry *gprs = core->ti_loongarch64->gpregs;
+	UserLoongarch64FpregsEntry *fprs = core->ti_loongarch64->fpregs;
+	for (i = 0; i < GPR_NUM; i++)
+		assign_reg(gprs, regs, regs[i]);
+	assign_reg(gprs, regs, pc);
+
+	for (i = 0; i < FPR_NUM; i++)
+		assign_reg(fpregs, fpregs, regs[i]);
+	assign_reg(fprs, fpregs, fcc);
+	assign_reg(fprs, fpregs, fcsr);
+	return 0;
+}
+
+int arch_alloc_thread_info(CoreEntry *core)
+{
+	ThreadInfoLoongarch64 *ti_loongarch64;
+	UserLoongarch64GpregsEntry *gpregs;
+	UserLoongarch64FpregsEntry *fpregs;
+
+	ti_loongarch64 = xmalloc(sizeof(*ti_loongarch64));
+	thread_info_loongarch64__init(ti_loongarch64);
+	core->ti_loongarch64 = ti_loongarch64;
+
+	gpregs = xmalloc(sizeof(*gpregs));
+	if (!gpregs)
+		goto err;
+	user_loongarch64_gpregs_entry__init(gpregs);
+	gpregs->n_regs = GPR_NUM;
+	gpregs->regs = xmalloc(GPR_NUM * sizeof(uint64_t));
+	if (!gpregs->regs)
+		goto err;
+	ti_loongarch64->gpregs = gpregs;
+
+	fpregs = xmalloc(sizeof(*fpregs));
+	if (!fpregs)
+		goto err;
+	user_loongarch64_fpregs_entry__init(fpregs);
+	fpregs->n_regs = FPR_NUM;
+	fpregs->regs = xmalloc(FPR_NUM * sizeof(uint64_t));
+	if (!fpregs->regs)
+		goto err;
+	ti_loongarch64->fpregs = fpregs;
+
+	return 0;
+err:
+	return -1;
+}
+
+void arch_free_thread_info(CoreEntry *core)
+{
+	if (CORE_THREAD_ARCH_INFO(core)) {
+		if (CORE_THREAD_ARCH_INFO(core)->fpregs) {
+			xfree(CORE_THREAD_ARCH_INFO(core)->fpregs->regs);
+			xfree(CORE_THREAD_ARCH_INFO(core)->fpregs);
+		}
+		xfree(CORE_THREAD_ARCH_INFO(core)->gpregs->regs);
+		xfree(CORE_THREAD_ARCH_INFO(core)->gpregs);
+		xfree(CORE_THREAD_ARCH_INFO(core));
+		CORE_THREAD_ARCH_INFO(core) = NULL;
+	}
+}
+
+int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
+{
+	fpu_context_t *fpu = RT_SIGFRAME_FPU(sigframe);
+	UserLoongarch64FpregsEntry *fpregs = core->ti_loongarch64->fpregs;
+
+	memcpy(fpu->regs, fpregs->regs, sizeof(fpu->regs));
+	fpu->fcc = fpregs->fcc;
+	fpu->fcsr = fpregs->fcsr;
+	return 0;
+}
+
+int restore_gpregs(struct rt_sigframe *sigframe, UserRegsEntry *r)
+{
+	sigcontext_t *sc = RT_SIGFRAME_SIGCTX(sigframe);
+	memcpy(sc->regs, r->regs, sizeof(sc->regs));
+	sc->pc = r->pc;
+	return 0;
+}
diff --git a/criu/arch/loongarch64/include/asm/dump.h b/criu/arch/loongarch64/include/asm/dump.h
new file mode 100644
index 000000000..a1c0c4c58
--- /dev/null
+++ b/criu/arch/loongarch64/include/asm/dump.h
@@ -0,0 +1,15 @@
+#ifndef __CR_ASM_DUMP_H__
+#define __CR_ASM_DUMP_H__
+
+extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *);
+extern int arch_alloc_thread_info(CoreEntry *core);
+extern void arch_free_thread_info(CoreEntry *core);
+
+static inline void core_put_tls(CoreEntry *core, tls_t tls)
+{
+	core->ti_loongarch64->tls = tls;
+}
+
+#define get_task_futex_robust_list_compat(pid, info) -1
+
+#endif
diff --git a/criu/arch/loongarch64/include/asm/int.h b/criu/arch/loongarch64/include/asm/int.h
new file mode 100644
index 000000000..642804e9b
--- /dev/null
+++ b/criu/arch/loongarch64/include/asm/int.h
@@ -0,0 +1,6 @@
+#ifndef __CR_ASM_INT_H__
+#define __CR_ASM_INT_H__
+
+#include "asm-generic/int.h"
+
+#endif /* __CR_ASM_INT_H__ */
diff --git a/criu/arch/loongarch64/include/asm/kerndat.h b/criu/arch/loongarch64/include/asm/kerndat.h
new file mode 100644
index 000000000..bb70cf6cf
--- /dev/null
+++ b/criu/arch/loongarch64/include/asm/kerndat.h
@@ -0,0 +1,7 @@
+#ifndef __CR_ASM_KERNDAT_H__
+#define __CR_ASM_KERNDAT_H__
+
+#define kdat_compatible_cr() 0
+#define kdat_can_map_vdso()  0
+
+#endif /* __CR_ASM_KERNDAT_H__ */
diff --git a/criu/arch/loongarch64/include/asm/parasite-syscall.h b/criu/arch/loongarch64/include/asm/parasite-syscall.h
new file mode 100644
index 000000000..6008c3792
--- /dev/null
+++ b/criu/arch/loongarch64/include/asm/parasite-syscall.h
@@ -0,0 +1,6 @@
+#ifndef __CR_ASM_PARASITE_SYSCALL_H__
+#define __CR_ASM_PARASITE_SYSCALL_H__
+
+struct parasite_ctl;
+
+#endif
diff --git a/criu/arch/loongarch64/include/asm/parasite.h b/criu/arch/loongarch64/include/asm/parasite.h
new file mode 100644
index 000000000..b64cb3185
--- /dev/null
+++ b/criu/arch/loongarch64/include/asm/parasite.h
@@ -0,0 +1,11 @@
+#ifndef __ASM_PARASITE_H__
+#define __ASM_PARASITE_H__
+
+static inline void arch_get_tls(tls_t *ptls)
+{
+	tls_t tls;
+	asm volatile("or %0, $zero, $tp" : "=r"(tls));
+	*ptls = tls;
+}
+
+#endif
diff --git a/criu/arch/loongarch64/include/asm/restore.h b/criu/arch/loongarch64/include/asm/restore.h
new file mode 100644
index 000000000..d956231c8
--- /dev/null
+++ b/criu/arch/loongarch64/include/asm/restore.h
@@ -0,0 +1,33 @@
+#ifndef __CR_ASM_RESTORE_H__
+#define __CR_ASM_RESTORE_H__
+
+#include "asm/restorer.h"
+#include "images/core.pb-c.h"
+
+/* clang-format off */
+#define JUMP_TO_RESTORER_BLOB(new_sp, restore_task_exec_start, task_args)	\
+({										\
+ 	uint64_t save_sp;							\
+	asm volatile("or %0, $zero, $sp" : "=r"(save_sp) : :"memory");		\
+	asm volatile(								\
+	        "or	$a0, $zero, %2	\n"					\
+	        "or	$sp, $zero, %0	\n"					\
+	        "jirl	$ra, %1, 0 	\n"					\
+	        :                               				\
+	        : "r"(new_sp & ~15),						\
+	          "r"(restore_task_exec_start), 				\
+	          "r"(task_args)						\
+	        : "$a0", "memory");						\
+	asm volatile("or $sp, $zero, %0" : : "r"(save_sp) : "memory"); 		\
+})
+
+/* clang-format on */
+
+static inline void core_get_tls(CoreEntry *pcore, tls_t *ptls)
+{
+	*ptls = pcore->ti_loongarch64->tls;
+}
+
+int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core);
+
+#endif
diff --git a/criu/arch/loongarch64/include/asm/restorer.h b/criu/arch/loongarch64/include/asm/restorer.h
new file mode 100644
index 000000000..7a0d35c5b
--- /dev/null
+++ b/criu/arch/loongarch64/include/asm/restorer.h
@@ -0,0 +1,97 @@
+#ifndef __CR_ASM_RESTORER_H__
+#define __CR_ASM_RESTORER_H__
+
+#include "asm/types.h"
+#include <compel/asm/fpu.h>
+#include "images/core.pb-c.h"
+#include <compel/plugins/std/syscall-codes.h>
+#include <compel/asm/sigframe.h>
+
+/* clang-format off */
+#define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid,	\
+			thread_args, clone_restore_fn)			\
+		asm volatile(								\
+				"clone_emul:					\n"	\
+				"ld.d		$a1, %2				\n"	\
+				"addi.d		$a1, $a1, -16 		\n"	\
+				"st.d		%5, $a1, 0			\n"	\
+				"st.d		%6, $a1, 8			\n"	\
+				"or			$a0, $zero, %1		\n"	\
+				"or			$a2, $zero, %3		\n"	\
+				"or			$a3, $zero, %4		\n"	\
+				"ori 		$a7, $zero, "__stringify(__NR_clone)"	\n"	\
+				"syscall	0					\n"	\
+												   	\
+				"beqz		$a0, thread_run     \n"	\
+												   	\
+				"or			%0, $zero, $a0		\n"	\
+				"b			clone_end			\n"	\
+												   	\
+				"thread_run:					\n"	\
+				"ld.d		$a1, $sp, 0			\n"	\
+				"ld.d		$a0, $sp, 8			\n"	\
+				"jirl		$ra, $a1, 0			\n"	\
+												   	\
+				"clone_end:						\n"	\
+				: "=r"(ret)							\
+				: "r"(clone_flags),					\
+				  "ZB"(new_sp),						\
+				  "r"(&parent_tid),					\
+				  "r"(&thread_args[i].pid),			\
+				  "r"(&clone_restore_fn),			\
+				  "r"(&thread_args[i])				\
+				: "$a0", "$a1", "$a2", "$a3", "$a7", "memory")
+
+#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args,	\
+			clone_restore_fn)								\
+		asm volatile(								\
+				"clone3_emul:					\n"	\
+				"or			$a0, $zero, %1		\n"	\
+				"or			$a1, $zero, %2		\n"	\
+				"or			$a2, $zero, %3		\n"	\
+				"or			$a3, $zero, %4		\n"	\
+				"ori		$a7, $zero, "__stringify(__NR_clone3)"	\n"	\
+				"syscall	0					\n"	\
+													\
+				"beqz		$a0, clone3_thread_run	\n"	\
+													\
+				"or			%0, $zero, $a0		\n"	\
+				"b			clone3_end			\n"	\
+													\
+				"clone3_thread_run:				\n"	\
+				"or			$a0, $zero, $a3		\n"	\
+				"jirl		$ra, $a2, 0			\n"	\
+				"clone3_end:					\n"	\
+				: "=r"(ret)							\
+				: "r"(&clone_args),					\
+				  "r"(size),						\
+				  "r"(clone_restore_fn),			\
+				  "r"(args)							\
+				: "$a0", "$a1", "$a2", "$a3", "$a7", "memory")
+/* clang-format on */
+
+static inline void restore_tls(tls_t *ptls)
+{
+	asm volatile("or $tp, $zero, %0" : : "r"(*ptls));
+}
+static inline int arch_compat_rt_sigaction(void *stack, int sig, void *act)
+{
+	return -1;
+}
+static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len)
+{
+	return -1;
+}
+static inline void *alloc_compat_syscall_stack(void)
+{
+	return NULL;
+}
+static inline void free_compat_syscall_stack(void *stack32)
+{
+}
+int restore_gpregs(struct rt_sigframe *f, UserLoongarch64GpregsEntry *r);
+int restore_nonsigframe_gpregs(UserLoongarch64GpregsEntry *r);
+
+#define arch_map_vdso(map, compat) -1
+
+#endif
diff --git a/criu/arch/loongarch64/include/asm/thread_pointer.h b/criu/arch/loongarch64/include/asm/thread_pointer.h
new file mode 100644
index 000000000..f7e07066a
--- /dev/null
+++ b/criu/arch/loongarch64/include/asm/thread_pointer.h
@@ -0,0 +1,27 @@
+/* __thread_pointer definition.  Generic version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _SYS_THREAD_POINTER_H
+#define _SYS_THREAD_POINTER_H
+
+static inline void *__criu_thread_pointer(void)
+{
+	return __builtin_thread_pointer();
+}
+
+#endif /* _SYS_THREAD_POINTER_H */
diff --git a/criu/arch/loongarch64/include/asm/types.h b/criu/arch/loongarch64/include/asm/types.h
new file mode 100644
index 000000000..72bca2022
--- /dev/null
+++ b/criu/arch/loongarch64/include/asm/types.h
@@ -0,0 +1,39 @@
+#ifndef __CR_ASM_TYPES_H__
+#define __CR_ASM_TYPES_H__
+
+#include <stdbool.h>
+#include <signal.h>
+
+#include "page.h"
+#include "bitops.h"
+#include "asm/int.h"
+#include "images/core.pb-c.h"
+
+#include <compel/plugins/std/asm/syscall-types.h>
+
+#define core_is_compat(core) false
+
+#define CORE_ENTRY__MARCH CORE_ENTRY__MARCH__LOONGARCH64
+
+#define CORE_THREAD_ARCH_INFO(core) core->ti_loongarch64
+
+#define TI_SP(core) ((core)->ti_loongarch64->gpregs->regs[4])
+
+#define TI_IP(core) ((core)->ti_loongarch64->gpregs->pc)
+
+typedef UserLoongarch64GpregsEntry UserRegsEntry;
+
+static inline uint64_t encode_pointer(void *p)
+{
+	return (uint64_t)p;
+}
+static inline void *decode_pointer(uint64_t v)
+{
+	return (void *)v;
+}
+
+#define AT_VECTOR_SIZE 44
+typedef uint64_t auxv_t;
+typedef uint64_t tls_t;
+
+#endif /* __CR_ASM_TYPES_H__ */
diff --git a/criu/arch/loongarch64/include/asm/vdso.h b/criu/arch/loongarch64/include/asm/vdso.h
new file mode 100644
index 000000000..64631dee0
--- /dev/null
+++ b/criu/arch/loongarch64/include/asm/vdso.h
@@ -0,0 +1,27 @@
+#ifndef __CR_ASM_VDSO_H__
+#define __CR_ASM_VDSO_H__
+
+#include "asm/int.h"
+#include "asm-generic/vdso.h"
+
+/* This definition is used in pie/util-vdso.c to initialize the vdso symbol
+ * name string table 'vdso_symbols'
+ */
+
+/*
+ * This is a minimal amount of symbols
+ * we should support at the moment.
+ */
+#define VDSO_SYMBOL_MAX	 5
+#define VDSO_SYMBOL_GTOD 3
+
+#define ARCH_VDSO_SYMBOLS_LIST                                   \
+	const char *aarch_vdso_symbol1 = "__vdso_getcpu";        \
+	const char *aarch_vdso_symbol2 = "__vdso_clock_getres";  \
+	const char *aarch_vdso_symbol3 = "__vdso_clock_gettime"; \
+	const char *aarch_vdso_symbol4 = "__vdso_gettimeofday";  \
+	const char *aarch_vdso_symbol5 = "__vdso_rt_sigreturn";
+
+#define ARCH_VDSO_SYMBOLS \
+	aarch_vdso_symbol1, aarch_vdso_symbol2, aarch_vdso_symbol3, aarch_vdso_symbol4, aarch_vdso_symbol5
+#endif
diff --git a/criu/arch/loongarch64/restorer.c b/criu/arch/loongarch64/restorer.c
new file mode 100644
index 000000000..730318ac1
--- /dev/null
+++ b/criu/arch/loongarch64/restorer.c
@@ -0,0 +1,14 @@
+#include <unistd.h>
+
+#include "restorer.h"
+#include "asm/restorer.h"
+#include <compel/asm/fpu.h>
+
+#include <compel/plugins/std/syscall.h>
+#include "log.h"
+#include "cpu.h"
+
+int restore_nonsigframe_gpregs(UserLoongarch64GpregsEntry *r)
+{
+	return 0;
+}
diff --git a/criu/arch/loongarch64/sigframe.c b/criu/arch/loongarch64/sigframe.c
new file mode 100644
index 000000000..18983ff13
--- /dev/null
+++ b/criu/arch/loongarch64/sigframe.c
@@ -0,0 +1,12 @@
+#include <stdlib.h>
+#include <stdint.h>
+
+#include "asm/sigframe.h"
+#include "asm/types.h"
+
+#include "log.h"
+#include <stdio.h>
+int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe)
+{
+	return 0;
+}
diff --git a/criu/arch/loongarch64/vdso-pie.c b/criu/arch/loongarch64/vdso-pie.c
new file mode 100644
index 000000000..7a75d2741
--- /dev/null
+++ b/criu/arch/loongarch64/vdso-pie.c
@@ -0,0 +1,48 @@
+#include <unistd.h>
+#include "asm/types.h"
+
+#include <compel/plugins/std/string.h>
+#include <compel/plugins/std/syscall.h>
+#include "parasite-vdso.h"
+#include "log.h"
+#include "common/bug.h"
+
+#ifdef LOG_PREFIX
+#undef LOG_PREFIX
+#endif
+#define LOG_PREFIX "vdso: "
+static void insert_trampoline(uintptr_t from, uintptr_t to)
+{
+	struct {
+		uint32_t pcaddi;
+		uint32_t ldptr;
+		uint32_t jirl;
+		uint32_t guards;
+		uint64_t imm64;
+	} __packed jmp = {
+		.pcaddi = 0x18000095, /*  pcaddi  $x, 4        */
+		.ldptr = 0x260002b5,  /*  ldptr.d $x, $x, 0    */
+		.jirl = 0x4c0002a0,   /*  jirl    $zero, $x, 0 */
+		.guards = 0x002a0000, /*  break   0            */
+		.imm64 = to,
+	};
+	memcpy((void *)from, &jmp, sizeof(jmp));
+}
+
+int vdso_redirect_calls(unsigned long base_to, unsigned long base_from, struct vdso_symtable *sto,
+			struct vdso_symtable *sfrom, bool compat_vdso)
+{
+	unsigned int i;
+	unsigned long from, to;
+	for (i = 0; i < ARRAY_SIZE(sto->symbols); i++) {
+		if (vdso_symbol_empty(&sfrom->symbols[i]))
+			continue;
+		pr_debug("br: %lx/%lx -> %lx/%lx (index %d)\n", base_from, sfrom->symbols[i].offset, base_to,
+			 sto->symbols[i].offset, i);
+
+		from = base_from + sfrom->symbols[i].offset;
+		to = base_to + sto->symbols[i].offset;
+		insert_trampoline(from, to);
+	}
+	return 0;
+}
diff --git a/criu/arch/mips/crtools.c b/criu/arch/mips/crtools.c
index ed4da9b7e..eabbd85f4 100644
--- a/criu/arch/mips/crtools.c
+++ b/criu/arch/mips/crtools.c
@@ -27,7 +27,7 @@
 #include "images/core.pb-c.h"
 #include "images/creds.pb-c.h"
 
-int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
+int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
 {
 	CoreEntry *core = x;
 
diff --git a/criu/arch/mips/include/asm/dump.h b/criu/arch/mips/include/asm/dump.h
index 58015833d..ec59b051b 100644
--- a/criu/arch/mips/include/asm/dump.h
+++ b/criu/arch/mips/include/asm/dump.h
@@ -1,7 +1,7 @@
 #ifndef __CR_ASM_DUMP_H__
 #define __CR_ASM_DUMP_H__
 
-extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *);
+extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *);
 extern int arch_alloc_thread_info(CoreEntry *core);
 extern void arch_free_thread_info(CoreEntry *core);
 extern int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info);
diff --git a/criu/arch/ppc64/cpu.c b/criu/arch/ppc64/cpu.c
index bb5b7256e..b87230f40 100644
--- a/criu/arch/ppc64/cpu.c
+++ b/criu/arch/ppc64/cpu.c
@@ -64,6 +64,12 @@ int cpu_validate_cpuinfo(void)
 	if (!img)
 		return -1;
 
+	if (empty_image(img)) {
+		pr_err("No cpuinfo image\n");
+		close_image(img);
+		return -1;
+	}
+
 	if (pb_read_one(img, &cpu_info, PB_CPUINFO) < 0)
 		goto error;
 
diff --git a/criu/arch/ppc64/crtools.c b/criu/arch/ppc64/crtools.c
index a08a2ca5b..d57040008 100644
--- a/criu/arch/ppc64/crtools.c
+++ b/criu/arch/ppc64/crtools.c
@@ -404,7 +404,7 @@ static int __copy_task_regs(user_regs_struct_t *regs, user_fpregs_struct_t *fpre
 	return 0;
 }
 
-int save_task_regs(void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f)
+int save_task_regs(pid_t pid, void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f)
 {
 	return __copy_task_regs(u, f, (CoreEntry *)arg);
 }
diff --git a/criu/arch/ppc64/include/asm/dump.h b/criu/arch/ppc64/include/asm/dump.h
index eb488900a..7393654fa 100644
--- a/criu/arch/ppc64/include/asm/dump.h
+++ b/criu/arch/ppc64/include/asm/dump.h
@@ -1,7 +1,7 @@
 #ifndef __CR_ASM_DUMP_H__
 #define __CR_ASM_DUMP_H__
 
-extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *);
+extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *);
 extern int arch_alloc_thread_info(CoreEntry *core);
 extern void arch_free_thread_info(CoreEntry *core);
 
diff --git a/criu/arch/riscv64/Makefile b/criu/arch/riscv64/Makefile
new file mode 100644
index 000000000..d19895471
--- /dev/null
+++ b/criu/arch/riscv64/Makefile
@@ -0,0 +1,8 @@
+builtin-name		:= crtools.built-in.o
+
+ldflags-y		+= -r
+
+obj-y			+= cpu.o
+obj-y			+= crtools.o
+obj-y			+= sigframe.o
+obj-y			+= vdso-lookup.o
\ No newline at end of file
diff --git a/criu/arch/riscv64/cpu.c b/criu/arch/riscv64/cpu.c
new file mode 100644
index 000000000..97a883b8c
--- /dev/null
+++ b/criu/arch/riscv64/cpu.c
@@ -0,0 +1,40 @@
+#undef LOG_PREFIX
+#define LOG_PREFIX "cpu: "
+
+#include <errno.h>
+#include "cpu.h"
+
+int cpu_init(void)
+{
+	return 0;
+}
+
+int cpu_dump_cpuinfo(void)
+{
+	return 0;
+}
+
+int cpu_validate_cpuinfo(void)
+{
+	return 0;
+}
+
+int cpu_dump_cpuinfo_single(void)
+{
+	return -ENOTSUP;
+}
+
+int cpu_validate_image_cpuinfo_single(void)
+{
+	return -ENOTSUP;
+}
+
+int cpuinfo_dump(void)
+{
+	return -ENOTSUP;
+}
+
+int cpuinfo_check(void)
+{
+	return -ENOTSUP;
+}
diff --git a/criu/arch/riscv64/crtools.c b/criu/arch/riscv64/crtools.c
new file mode 100644
index 000000000..eea98d6de
--- /dev/null
+++ b/criu/arch/riscv64/crtools.c
@@ -0,0 +1,171 @@
+#include <string.h>
+#include <unistd.h>
+
+#include <linux/elf.h>
+
+#include "types.h"
+#include <compel/asm/processor-flags.h>
+
+#include <compel/asm/infect-types.h>
+#include "asm/restorer.h"
+#include "common/compiler.h"
+#include <compel/ptrace.h>
+#include "asm/dump.h"
+#include "protobuf.h"
+#include "images/core.pb-c.h"
+#include "images/creds.pb-c.h"
+#include "parasite-syscall.h"
+#include "log.h"
+#include "util.h"
+#include "cpu.h"
+#include "restorer.h"
+#include "compel/infect.h"
+
+#define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))(src)->e
+
+int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpsimd)
+{
+	int i;
+	CoreEntry *core = x;
+
+	// Save riscv64 gprs
+	assign_reg(core->ti_riscv64->gpregs, regs, pc);
+	assign_reg(core->ti_riscv64->gpregs, regs, ra);
+	assign_reg(core->ti_riscv64->gpregs, regs, sp);
+	assign_reg(core->ti_riscv64->gpregs, regs, gp);
+	assign_reg(core->ti_riscv64->gpregs, regs, tp);
+	assign_reg(core->ti_riscv64->gpregs, regs, t0);
+	assign_reg(core->ti_riscv64->gpregs, regs, t1);
+	assign_reg(core->ti_riscv64->gpregs, regs, t2);
+	assign_reg(core->ti_riscv64->gpregs, regs, s0);
+	assign_reg(core->ti_riscv64->gpregs, regs, s1);
+	assign_reg(core->ti_riscv64->gpregs, regs, a0);
+	assign_reg(core->ti_riscv64->gpregs, regs, a1);
+	assign_reg(core->ti_riscv64->gpregs, regs, a2);
+	assign_reg(core->ti_riscv64->gpregs, regs, a3);
+	assign_reg(core->ti_riscv64->gpregs, regs, a4);
+	assign_reg(core->ti_riscv64->gpregs, regs, a5);
+	assign_reg(core->ti_riscv64->gpregs, regs, a6);
+	assign_reg(core->ti_riscv64->gpregs, regs, a7);
+	assign_reg(core->ti_riscv64->gpregs, regs, s2);
+	assign_reg(core->ti_riscv64->gpregs, regs, s3);
+	assign_reg(core->ti_riscv64->gpregs, regs, s4);
+	assign_reg(core->ti_riscv64->gpregs, regs, s5);
+	assign_reg(core->ti_riscv64->gpregs, regs, s6);
+	assign_reg(core->ti_riscv64->gpregs, regs, s7);
+	assign_reg(core->ti_riscv64->gpregs, regs, s8);
+	assign_reg(core->ti_riscv64->gpregs, regs, s9);
+	assign_reg(core->ti_riscv64->gpregs, regs, s10);
+	assign_reg(core->ti_riscv64->gpregs, regs, s11);
+	assign_reg(core->ti_riscv64->gpregs, regs, t3);
+	assign_reg(core->ti_riscv64->gpregs, regs, t4);
+	assign_reg(core->ti_riscv64->gpregs, regs, t5);
+	assign_reg(core->ti_riscv64->gpregs, regs, t6);
+
+	// Save riscv64 fprs
+	for (i = 0; i < 32; ++i)
+		assign_reg(core->ti_riscv64->fpsimd, fpsimd, f[i]);
+	assign_reg(core->ti_riscv64->fpsimd, fpsimd, fcsr);
+
+	return 0;
+}
+
+int arch_alloc_thread_info(CoreEntry *core)
+{
+	ThreadInfoRiscv64 *ti_riscv64;
+	UserRiscv64RegsEntry *gpregs;
+	UserRiscv64DExtEntry *fpsimd;
+
+	ti_riscv64 = xmalloc(sizeof(*ti_riscv64));
+	if (!ti_riscv64)
+		goto err;
+	thread_info_riscv64__init(ti_riscv64);
+	core->ti_riscv64 = ti_riscv64;
+
+	gpregs = xmalloc(sizeof(*gpregs));
+	if (!gpregs)
+		goto err;
+	user_riscv64_regs_entry__init(gpregs);
+
+	ti_riscv64->gpregs = gpregs;
+
+	fpsimd = xmalloc(sizeof(*fpsimd));
+	if (!fpsimd)
+		goto err;
+	user_riscv64_d_ext_entry__init(fpsimd);
+	ti_riscv64->fpsimd = fpsimd;
+	fpsimd->f = xmalloc(32 * sizeof(fpsimd->f[0]));
+	fpsimd->n_f = 32;
+	if (!fpsimd->f)
+		goto err;
+
+	return 0;
+err:
+	return -1;
+}
+
+void arch_free_thread_info(CoreEntry *core)
+{
+	if (core->ti_riscv64) {
+		if (core->ti_riscv64->fpsimd) {
+			xfree(core->ti_riscv64->fpsimd->f);
+			xfree(core->ti_riscv64->fpsimd);
+		}
+		xfree(core->ti_riscv64->gpregs);
+		xfree(core->ti_riscv64);
+		core->ti_riscv64 = NULL;
+	}
+}
+
+int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
+{
+	int i;
+	UserRiscv64DExtEntry *fpsimd = core->ti_riscv64->fpsimd;
+
+	if (fpsimd->n_f != 32)
+		return 1;
+
+	for (i = 0; i < 32; ++i)
+		sigframe->uc.uc_mcontext.__fpregs.__d.__f[i] = fpsimd->f[i];
+	sigframe->uc.uc_mcontext.__fpregs.__d.__fcsr = fpsimd->fcsr;
+
+	return 0;
+}
+
+int restore_gpregs(struct rt_sigframe *f, UserRiscv64RegsEntry *r)
+{
+	f->uc.uc_mcontext.__gregs[0] = r->pc;
+	f->uc.uc_mcontext.__gregs[1] = r->ra;
+	f->uc.uc_mcontext.__gregs[2] = r->sp;
+	f->uc.uc_mcontext.__gregs[3] = r->gp;
+	f->uc.uc_mcontext.__gregs[4] = r->tp;
+	f->uc.uc_mcontext.__gregs[5] = r->t0;
+	f->uc.uc_mcontext.__gregs[6] = r->t1;
+	f->uc.uc_mcontext.__gregs[7] = r->t2;
+	f->uc.uc_mcontext.__gregs[8] = r->s0;
+	f->uc.uc_mcontext.__gregs[9] = r->s1;
+	f->uc.uc_mcontext.__gregs[10] = r->a0;
+	f->uc.uc_mcontext.__gregs[11] = r->a1;
+	f->uc.uc_mcontext.__gregs[12] = r->a2;
+	f->uc.uc_mcontext.__gregs[13] = r->a3;
+	f->uc.uc_mcontext.__gregs[14] = r->a4;
+	f->uc.uc_mcontext.__gregs[15] = r->a5;
+	f->uc.uc_mcontext.__gregs[16] = r->a6;
+	f->uc.uc_mcontext.__gregs[17] = r->a7;
+	f->uc.uc_mcontext.__gregs[18] = r->s2;
+	f->uc.uc_mcontext.__gregs[19] = r->s3;
+	f->uc.uc_mcontext.__gregs[20] = r->s4;
+	f->uc.uc_mcontext.__gregs[21] = r->s5;
+	f->uc.uc_mcontext.__gregs[22] = r->s6;
+	f->uc.uc_mcontext.__gregs[23] = r->s7;
+	f->uc.uc_mcontext.__gregs[24] = r->s8;
+	f->uc.uc_mcontext.__gregs[25] = r->s9;
+	f->uc.uc_mcontext.__gregs[26] = r->s10;
+	f->uc.uc_mcontext.__gregs[27] = r->s11;
+	f->uc.uc_mcontext.__gregs[28] = r->t3;
+	f->uc.uc_mcontext.__gregs[29] = r->t4;
+	f->uc.uc_mcontext.__gregs[30] = r->t5;
+	f->uc.uc_mcontext.__gregs[31] = r->t6;
+
+	return 0;
+}
diff --git a/criu/arch/riscv64/include/asm/dump.h b/criu/arch/riscv64/include/asm/dump.h
new file mode 100644
index 000000000..4f0a2d209
--- /dev/null
+++ b/criu/arch/riscv64/include/asm/dump.h
@@ -0,0 +1,15 @@
+#ifndef __CR_ASM_DUMP_H__
+#define __CR_ASM_DUMP_H__
+
+extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *);
+extern int arch_alloc_thread_info(CoreEntry *core);
+extern void arch_free_thread_info(CoreEntry *core);
+
+static inline void core_put_tls(CoreEntry *core, tls_t tls)
+{
+	core->ti_riscv64->tls = tls;
+}
+
+#define get_task_futex_robust_list_compat(pid, info) -1
+
+#endif
diff --git a/criu/arch/riscv64/include/asm/int.h b/criu/arch/riscv64/include/asm/int.h
new file mode 100644
index 000000000..642804e9b
--- /dev/null
+++ b/criu/arch/riscv64/include/asm/int.h
@@ -0,0 +1,6 @@
+#ifndef __CR_ASM_INT_H__
+#define __CR_ASM_INT_H__
+
+#include "asm-generic/int.h"
+
+#endif /* __CR_ASM_INT_H__ */
diff --git a/criu/arch/riscv64/include/asm/kerndat.h b/criu/arch/riscv64/include/asm/kerndat.h
new file mode 100644
index 000000000..bb70cf6cf
--- /dev/null
+++ b/criu/arch/riscv64/include/asm/kerndat.h
@@ -0,0 +1,7 @@
+#ifndef __CR_ASM_KERNDAT_H__
+#define __CR_ASM_KERNDAT_H__
+
+#define kdat_compatible_cr() 0
+#define kdat_can_map_vdso()  0
+
+#endif /* __CR_ASM_KERNDAT_H__ */
diff --git a/criu/arch/riscv64/include/asm/parasite-syscall.h b/criu/arch/riscv64/include/asm/parasite-syscall.h
new file mode 100644
index 000000000..6008c3792
--- /dev/null
+++ b/criu/arch/riscv64/include/asm/parasite-syscall.h
@@ -0,0 +1,6 @@
+#ifndef __CR_ASM_PARASITE_SYSCALL_H__
+#define __CR_ASM_PARASITE_SYSCALL_H__
+
+struct parasite_ctl;
+
+#endif
diff --git a/criu/arch/riscv64/include/asm/parasite.h b/criu/arch/riscv64/include/asm/parasite.h
new file mode 100644
index 000000000..4798cfd8a
--- /dev/null
+++ b/criu/arch/riscv64/include/asm/parasite.h
@@ -0,0 +1,16 @@
+#ifndef __ASM_PARASITE_H__
+#define __ASM_PARASITE_H__
+
+/*
+ * This function is used to retrieve the value of the thread pointer (tp)
+ * in RISC-V architecture, which is typically used for thread-local storage (TLS).
+ * The value is then stored in the provided tls_t pointer.
+ */
+static inline void arch_get_tls(tls_t *ptls)
+{
+	tls_t tls;
+	asm("mv %0, tp" : "=r"(tls));
+	*ptls = tls;
+}
+
+#endif
diff --git a/criu/arch/riscv64/include/asm/restore.h b/criu/arch/riscv64/include/asm/restore.h
new file mode 100644
index 000000000..e4f25a57b
--- /dev/null
+++ b/criu/arch/riscv64/include/asm/restore.h
@@ -0,0 +1,29 @@
+#ifndef __CR_ASM_RESTORE_H__
+#define __CR_ASM_RESTORE_H__
+
+#include "asm/restorer.h"
+
+#include "images/core.pb-c.h"
+
+/* clang-format off */
+#define JUMP_TO_RESTORER_BLOB(new_sp, restore_task_exec_start,	\
+			      task_args)			\
+	asm volatile(						\
+			"and  sp, %0, ~15		\n"	\
+			"mv  a0, %2			\n"	\
+			"jr   %1 			\n"	\
+			:					\
+			: "r"(new_sp),				\
+			  "r"(restore_task_exec_start),		\
+			  "r"(task_args)			\
+			: "a0", "memory")
+/* clang-format on */
+
+static inline void core_get_tls(CoreEntry *pcore, tls_t *ptls)
+{
+	*ptls = pcore->ti_riscv64->tls;
+}
+
+int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core);
+
+#endif
diff --git a/criu/arch/riscv64/include/asm/restorer.h b/criu/arch/riscv64/include/asm/restorer.h
new file mode 100644
index 000000000..45fe847a9
--- /dev/null
+++ b/criu/arch/riscv64/include/asm/restorer.h
@@ -0,0 +1,150 @@
+#ifndef __CR_ASM_RESTORER_H__
+#define __CR_ASM_RESTORER_H__
+
+#include <sys/ucontext.h>
+
+#include "asm/types.h"
+#include "images/core.pb-c.h"
+
+#include <compel/asm/sigframe.h>
+
+// kernel arg order for clone
+// unsigned long clone_flags,
+// unsigned long newsp,
+// int __user * parent_tidptr,
+// unsigned long tls,
+// int __user * child_tidptr
+/* clang-format off */
+#define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid,		\
+			     thread_args, clone_restore_fn)			\
+	asm volatile(								\
+			"clone_emul:					\n"	\
+			"ld a1, %2					\n"	\
+			"andi a1, a1, ~15				\n"	\
+			"addi a1, a1, -16				\n"	\
+			"sd %5, 0(a1)					\n"	\
+			"sd %6, 8(a1)					\n"	\
+			"mv a0, %1					\n"	\
+			"mv a2, %3					\n"	\
+			"mv a3, %4					\n"	\
+			"li a7, "__stringify(__NR_clone)"		\n"	\
+			"ecall						\n"	\
+										\
+			"beqz a0, thread_run				\n"	\
+										\
+			"mv %0, a0					\n"	\
+			"j   clone_end					\n"	\
+										\
+			"thread_run:					\n"	\
+			"ld a1, 0(sp)					\n"	\
+			"ld a0, 8(sp)					\n"	\
+			"jr  a1						\n"	\
+										\
+			"clone_end:					\n"	\
+			: "=r"(ret)						\
+			: "r"(clone_flags),					\
+			  "m"(new_sp),						\
+			  "r"(&parent_tid),					\
+			  "r"(&thread_args[i].pid),				\
+			  "r"(clone_restore_fn),				\
+			  "r"(&thread_args[i])					\
+			: "a0", "a1", "a2", "a3", "a7", "memory")
+
+/*
+ * Based on sysdeps/unix/sysv/linux/riscv/clone.S
+ *
+ * int clone(int (*fn)(void *arg),            x0
+ *	     void *child_stack,               x1
+ *	     int flags,                       x2
+ *	     void *arg,                       x3
+ *	     pid_t *ptid,                     x4
+ *	     struct user_desc *tls,           x5
+ *	     pid_t *ctid);                    x6
+ *
+ * int clone3(struct clone_args *args,        x0
+ *	      size_t size);                   x1
+ *
+ * Always consult the CLONE3 wrappers for other architectures
+ * for additional details.
+ *
+ */
+#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args,			\
+			      clone_restore_fn)					\
+	asm volatile(								\
+	/* In contrast to the clone() wrapper above this does not put
+	 * the thread function and its arguments on the child stack,
+	 * but uses registers to pass these parameters to the child process.
+	 * Based on the glibc clone() wrapper at
+	 * sysdeps/unix/sysv/linux/riscv/clone.S.
+	 */									\
+			"clone3_emul:					\n"	\
+	/*
+	 * Based on the glibc clone() wrapper, which uses x10 and x11
+	 * to save the arguments for the child process, this does the same.
+	 * x10 for the thread function and x11 for the thread arguments.
+	 */									\
+			"mv t0, %3	/* clone_restore_fn */		\n"	\
+			"mv t1, %4	/* args */			\n"	\
+			"mv a0, %1	/* &clone_args */		\n"	\
+			"mv a1, %2	/* size */			\n"	\
+	/* Load syscall number */						\
+			"li a7, "__stringify(__NR_clone3)"		\n"	\
+	/* Do the syscall */							\
+			"ecall						\n"	\
+										\
+			"beqz a0, clone3_thread_run			\n"	\
+										\
+			"mv %0, a0					\n"	\
+			"j   clone3_end					\n"	\
+										\
+			"clone3_thread_run:				\n"	\
+	/* Move args to a0 */							\
+			"mv a0, t1					\n"	\
+	/* Jump to clone_restore_fn */						\
+			"jr  t0						\n"	\
+										\
+			"clone3_end:					\n"	\
+			: "=r"(ret)						\
+			: "r"(&clone_args),					\
+			  "r"(size),						\
+			  "r"(clone_restore_fn),				\
+			  "r"(args)						\
+			: "a0", "a1", "a7", "t0", "t1", "memory")
+
+#define ARCH_FAIL_CORE_RESTORE					\
+	asm volatile(						\
+			"mv sp, %0			\n"	\
+			"li a0, 0			\n"	\
+			"jr   x0			\n"	\
+			:					\
+			: "r"(ret)				\
+			: "sp", "a0", "memory")
+/* clang-format on */
+
+#define arch_map_vdso(map, compat) -1
+
+int restore_gpregs(struct rt_sigframe *f, UserRiscv64RegsEntry *r);
+int restore_nonsigframe_gpregs(UserRiscv64RegsEntry *r);
+
+static inline void restore_tls(tls_t *ptls)
+{
+	asm("mv tp, %0" : : "r"(*ptls));
+}
+
+static inline void *alloc_compat_syscall_stack(void)
+{
+	return NULL;
+}
+static inline void free_compat_syscall_stack(void *stack32)
+{
+}
+static inline int arch_compat_rt_sigaction(void *stack, int sig, void *act)
+{
+	return -1;
+}
+static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len)
+{
+	return -1;
+}
+
+#endif
\ No newline at end of file
diff --git a/criu/arch/riscv64/include/asm/thread_pointer.h b/criu/arch/riscv64/include/asm/thread_pointer.h
new file mode 100644
index 000000000..f7e07066a
--- /dev/null
+++ b/criu/arch/riscv64/include/asm/thread_pointer.h
@@ -0,0 +1,27 @@
+/* __thread_pointer definition.  Generic version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _SYS_THREAD_POINTER_H
+#define _SYS_THREAD_POINTER_H
+
+static inline void *__criu_thread_pointer(void)
+{
+	return __builtin_thread_pointer();
+}
+
+#endif /* _SYS_THREAD_POINTER_H */
diff --git a/criu/arch/riscv64/include/asm/types.h b/criu/arch/riscv64/include/asm/types.h
new file mode 100644
index 000000000..83bb5f65f
--- /dev/null
+++ b/criu/arch/riscv64/include/asm/types.h
@@ -0,0 +1,40 @@
+#ifndef __CR_ASM_TYPES_H__
+#define __CR_ASM_TYPES_H__
+
+#include <stdbool.h>
+#include <signal.h>
+#include <asm/ptrace.h>
+#include "images/core.pb-c.h"
+
+#include "page.h"
+#include "bitops.h"
+#include "asm/int.h"
+
+#include <compel/plugins/std/asm/syscall-types.h>
+
+#define core_is_compat(core) false
+
+typedef UserRiscv64RegsEntry UserRegsEntry;
+
+#define CORE_ENTRY__MARCH CORE_ENTRY__MARCH__RISCV64
+
+#define CORE_THREAD_ARCH_INFO(core) core->ti_riscv64
+
+#define TI_SP(core) ((core)->ti_riscv64->gpregs->sp)
+
+#define TI_IP(core) ((core)->ti_riscv64->gpregs->pc)
+
+static inline void *decode_pointer(uint64_t v)
+{
+	return (void *)v;
+}
+static inline uint64_t encode_pointer(void *p)
+{
+	return (uint64_t)p;
+}
+
+#define AT_VECTOR_SIZE 64
+typedef uint64_t auxv_t;
+typedef uint64_t tls_t;
+
+#endif /* __CR_ASM_TYPES_H__ */
diff --git a/criu/arch/riscv64/include/asm/vdso.h b/criu/arch/riscv64/include/asm/vdso.h
new file mode 100644
index 000000000..322149c6e
--- /dev/null
+++ b/criu/arch/riscv64/include/asm/vdso.h
@@ -0,0 +1,28 @@
+#ifndef __CR_ASM_VDSO_H__
+#define __CR_ASM_VDSO_H__
+
+#include "asm/int.h"
+#include "common/compiler.h"
+#include "asm-generic/vdso.h"
+
+/*
+ * This is a minimal amount of symbols
+ * we should support at the moment.
+ */
+#define VDSO_SYMBOL_MAX	 6
+#define VDSO_SYMBOL_GTOD 2
+
+#define ARCH_VDSO_SYMBOLS_LIST                                  \
+	const char *rv64_vdso_symbol1 = "__vdso_clock_getres";  \
+	const char *rv64_vdso_symbol2 = "__vdso_clock_gettime"; \
+	const char *rv64_vdso_symbol3 = "__vdso_gettimeofday";  \
+	const char *rv64_vdso_symbol4 = "__vdso_getcpu";        \
+	const char *rv64_vdso_symbol5 = "__vdso_flush_icache";  \
+	const char *rv64_vdso_symbol6 = "__vdso_rt_sigreturn";
+
+#define ARCH_VDSO_SYMBOLS \
+	rv64_vdso_symbol1, rv64_vdso_symbol2, rv64_vdso_symbol3, rv64_vdso_symbol4, rv64_vdso_symbol5, rv64_vdso_symbol6
+
+extern void write_intraprocedure_branch(unsigned long to, unsigned long from);
+
+#endif /* __CR_ASM_VDSO_H__ */
\ No newline at end of file
diff --git a/criu/arch/riscv64/restorer.c b/criu/arch/riscv64/restorer.c
new file mode 100644
index 000000000..d605f048d
--- /dev/null
+++ b/criu/arch/riscv64/restorer.c
@@ -0,0 +1,14 @@
+#include <unistd.h>
+
+#include "restorer.h"
+#include "asm/restorer.h"
+
+#include <compel/plugins/std/syscall.h>
+#include "log.h"
+#include <compel/asm/fpu.h>
+#include "cpu.h"
+
+int restore_nonsigframe_gpregs(UserRiscv64RegsEntry *r)
+{
+	return 0;
+}
diff --git a/criu/arch/riscv64/sigframe.c b/criu/arch/riscv64/sigframe.c
new file mode 100644
index 000000000..8096fab66
--- /dev/null
+++ b/criu/arch/riscv64/sigframe.c
@@ -0,0 +1,8 @@
+#include "asm/types.h"
+#include <compel/asm/infect-types.h>
+#include "asm/sigframe.h"
+
+int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe)
+{
+	return 0;
+}
diff --git a/criu/arch/riscv64/vdso-lookup.S b/criu/arch/riscv64/vdso-lookup.S
new file mode 100644
index 000000000..50d4ecf08
--- /dev/null
+++ b/criu/arch/riscv64/vdso-lookup.S
@@ -0,0 +1,15 @@
+#include "common/asm/linkage.h"
+
+.section	.text
+
+/* Expects t0 to hold the index into the lookup table. */
+GLOBAL(riscv_vdso_lookup)
+	/* Get the beginning of the lookup table */
+	la t1, riscv_vdso_lookup_end
+	/* Scale the index */
+	slli t0, t0, 3
+	add t1, t0, t1
+	ld t2, 0(t1)
+	jr t2
+
+GLOBAL(riscv_vdso_lookup_end)
\ No newline at end of file
diff --git a/criu/arch/riscv64/vdso-pie.c b/criu/arch/riscv64/vdso-pie.c
new file mode 100644
index 000000000..aa9272fb5
--- /dev/null
+++ b/criu/arch/riscv64/vdso-pie.c
@@ -0,0 +1,159 @@
+#include <unistd.h>
+
+#include "asm/types.h"
+
+#include <compel/asm/instruction_formats.h>
+#include <compel/plugins/std/string.h>
+#include <compel/plugins/std/syscall.h>
+#include <compel/plugins/std/syscall-codes.h>
+#include "atomic.h"
+#include "parasite-vdso.h"
+#include "log.h"
+#include "common/bug.h"
+
+#ifdef LOG_PREFIX
+#undef LOG_PREFIX
+#endif
+#define LOG_PREFIX "vdso: "
+
+/* These symbols are defined in vdso-lookup.S */
+extern char *riscv_vdso_lookup, *riscv_vdso_lookup_end;
+
+/*
+ *  li t0, INDEX
+ *  jal x0, riscv_vdso_lookup
+ */
+#define TRAMP_CALL_SIZE (2 * sizeof(uint32_t))
+
+static inline void invalidate_caches(void)
+{
+	// We're supposed to use the VDSO as the officially sanctioned ABI. But oh well.
+	int ret;
+	__smp_mb();
+	asm volatile("li a0, 0\n"
+		     "li a1, 0\n"
+		     "li a2, 1\n"   /* SYS_RISCV_FLUSH_ICACHE_ALL */
+		     "li a7, 259\n" /* __NR_arch_specific_syscall */
+		     "ecall\n"
+		     : "=r"(ret)
+		     :
+		     : "a7");
+}
+
+static inline size_t vdso_trampoline_size(void)
+{
+	return (size_t)&riscv_vdso_lookup_end - (size_t)&riscv_vdso_lookup;
+}
+
+static uint64_t put_trampoline(uint64_t at, struct vdso_symtable *sym)
+{
+	int i, j;
+	uint64_t total_size, trampoline_size;
+	uint64_t trampoline = 0;
+
+	/* First of all we have to find a place where to put the trampoline
+	 * code.
+	 */
+	trampoline_size = vdso_trampoline_size();
+	total_size = trampoline_size + VDSO_SYMBOL_MAX * sizeof(uint64_t);
+
+	for (i = 0; i < ARRAY_SIZE(sym->symbols); i++) {
+		if (vdso_symbol_empty(&sym->symbols[i]))
+			continue;
+
+		pr_debug("Checking '%s' at %lx\n", sym->symbols[i].name, sym->symbols[i].offset);
+
+		/* find the nearest following symbol we are interested in */
+		for (j = 0; j < ARRAY_SIZE(sym->symbols); j++) {
+			if (i == j || vdso_symbol_empty(&sym->symbols[j]))
+				continue;
+
+			if (sym->symbols[j].offset <= sym->symbols[i].offset)
+				/* this symbol is above the current one */
+				continue;
+
+			if ((sym->symbols[i].offset + TRAMP_CALL_SIZE) > sym->symbols[j].offset) {
+				/* we have a major issue here since we cannot
+				 * even put the trampoline call for this symbol
+				 */
+				pr_err("Can't handle small vDSO symbol %s\n", sym->symbols[i].name);
+				return 0;
+			}
+
+			if (trampoline)
+				/* no need to put it twice */
+				continue;
+
+			if ((sym->symbols[j].offset - (sym->symbols[i].offset + TRAMP_CALL_SIZE)) <= total_size)
+				/* not enough place */
+				continue;
+
+			/* We can put the trampoline there */
+			trampoline = at + sym->symbols[i].offset;
+			trampoline += TRAMP_CALL_SIZE;
+
+			pr_debug("Putting vDSO trampoline in %s at %lx\n", sym->symbols[i].name, trampoline);
+			memcpy((void *)trampoline, &riscv_vdso_lookup, trampoline_size);
+			invalidate_caches();
+			return trampoline;
+		}
+	}
+
+	return 0;
+}
+
+static inline void put_trampoline_call(uint64_t from, uint64_t to, uint64_t trampoline, unsigned int idx)
+{
+	size_t trampoline_size = vdso_trampoline_size();
+	uint64_t *lookup_table = NULL;
+	/*
+	 *  li t0, INDEX
+	 *  addi t0, x0 INDEX
+	 *  jal x0, riscv_vdso_lookup
+	 */
+	uint32_t trampoline_call[2] = {
+		0x00000293,
+		0x0000006f,
+	};
+	const size_t insts_len = ARRAY_SIZE(trampoline_call);
+	uint32_t *call_addr = (uint32_t *)from;
+	// Offset from the jal instruction to the lookup trampoline.
+	ssize_t trampoline_offset = trampoline - (from + sizeof(uint32_t));
+
+	trampoline_call[0] = trampoline_call[0] | (idx << 24);
+	trampoline_call[1] = trampoline_call[1] | riscv_j_imm(trampoline_offset);
+
+	for (unsigned int i = 0; i < insts_len; i++) {
+		call_addr[i] = trampoline_call[i];
+	}
+
+	// Set the lookup table pointer for this vdso symbol.
+	lookup_table = (uint64_t *)(trampoline + trampoline_size);
+	lookup_table[idx] = to;
+}
+
+int vdso_redirect_calls(uint64_t base_to, uint64_t base_from, struct vdso_symtable *to, struct vdso_symtable *from,
+			bool __always_unused compat_vdso)
+{
+	unsigned int i, valid_idx = 0;
+
+	uint64_t trampoline = (uint64_t)put_trampoline(base_from, from);
+	if (!trampoline)
+		return 1;
+
+	for (i = 0; i < ARRAY_SIZE(to->symbols); i++) {
+		if (vdso_symbol_empty(&from->symbols[i]))
+			continue;
+
+		pr_debug("br: %lx/%lx -> %lx/%lx (index %d) '%s'\n", base_from, from->symbols[i].offset, base_to,
+			 to->symbols[i].offset, i, from->symbols[i].name);
+
+		put_trampoline_call(base_from + from->symbols[i].offset, base_to + to->symbols[i].offset, trampoline,
+				    valid_idx);
+		valid_idx++;
+	}
+
+	invalidate_caches();
+
+	return 0;
+}
\ No newline at end of file
diff --git a/criu/arch/s390/cpu.c b/criu/arch/s390/cpu.c
index 3f430f455..e227fad5e 100644
--- a/criu/arch/s390/cpu.c
+++ b/criu/arch/s390/cpu.c
@@ -87,6 +87,12 @@ int cpu_validate_cpuinfo(void)
 	if (!img)
 		return -1;
 
+	if (empty_image(img)) {
+		pr_err("No cpuinfo image\n");
+		close_image(img);
+		return -1;
+	}
+
 	ret = 0;
 	if (pb_read_one(img, &cpu_info, PB_CPUINFO) < 0)
 		goto error;
diff --git a/criu/arch/s390/crtools.c b/criu/arch/s390/crtools.c
index 5cf160d82..e08c83878 100644
--- a/criu/arch/s390/crtools.c
+++ b/criu/arch/s390/crtools.c
@@ -142,6 +142,29 @@ static void print_core_fp_regs(const char *msg, CoreEntry *core)
 	print_core_ri_cb(core);
 }
 
+/*
+ * Allocate floating point registers
+ */
+static UserS390FpregsEntry *allocate_fp_regs(void)
+{
+	UserS390FpregsEntry *fpregs;
+
+	fpregs = xmalloc(sizeof(*fpregs));
+	if (!fpregs)
+		return NULL;
+	user_s390_fpregs_entry__init(fpregs);
+
+	fpregs->n_fprs = 16;
+	fpregs->fprs = xzalloc(16 * sizeof(uint64_t));
+	if (!fpregs->fprs)
+		goto fail_free_fpregs;
+	return fpregs;
+
+fail_free_fpregs:
+	xfree(fpregs);
+	return NULL;
+}
+
 /*
  * Allocate VxrsLow registers
  */
@@ -282,7 +305,7 @@ static void free_ri_cb(UserS390RiEntry *ri_cb)
 /*
  * Copy internal structures into Google Protocol Buffers
  */
-int save_task_regs(void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f)
+int save_task_regs(pid_t pid, void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f)
 {
 	UserS390VxrsHighEntry *vxrs_high = NULL;
 	UserS390VxrsLowEntry *vxrs_low = NULL;
@@ -294,7 +317,13 @@ int save_task_regs(void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f)
 	CoreEntry *core = arg;
 
 	gpregs = CORE_THREAD_ARCH_INFO(core)->gpregs;
-	fpregs = CORE_THREAD_ARCH_INFO(core)->fpregs;
+	/*
+	 * We delay allocating this until now because checkpointing can fail earlier.
+	 * When it fails we need to know if we reached here or not so that the cleanup
+	 * code doesn't restore FPRs that were never saved in the first place.
+	 */
+	fpregs = allocate_fp_regs();
+	CORE_THREAD_ARCH_INFO(core)->fpregs = fpregs;
 
 	/* Vector registers */
 	if (f->flags & USER_FPREGS_VXRS) {
@@ -399,36 +428,15 @@ int restore_fpu(struct rt_sigframe *f, CoreEntry *core)
 	return 0;
 }
 
-/*
- * Allocate floating point registers
- */
-static UserS390FpregsEntry *allocate_fp_regs(void)
-{
-	UserS390FpregsEntry *fpregs;
-
-	fpregs = xmalloc(sizeof(*fpregs));
-	if (!fpregs)
-		return NULL;
-	user_s390_fpregs_entry__init(fpregs);
-
-	fpregs->n_fprs = 16;
-	fpregs->fprs = xzalloc(16 * sizeof(uint64_t));
-	if (!fpregs->fprs)
-		goto fail_free_fpregs;
-	return fpregs;
-
-fail_free_fpregs:
-	xfree(fpregs);
-	return NULL;
-}
-
 /*
  * Free floating point registers
  */
 static void free_fp_regs(UserS390FpregsEntry *fpregs)
 {
-	xfree(fpregs->fprs);
-	xfree(fpregs);
+	if (fpregs) {
+		xfree(fpregs->fprs);
+		xfree(fpregs);
+	}
 }
 
 /*
@@ -487,15 +495,17 @@ int arch_alloc_thread_info(CoreEntry *core)
 	ti_s390->gpregs = allocate_gp_regs();
 	if (!ti_s390->gpregs)
 		goto fail_free_ti_s390;
-	ti_s390->fpregs = allocate_fp_regs();
-	if (!ti_s390->fpregs)
-		goto fail_free_gp_regs;
+
+	/*
+	 * Delay allocating space until needed. Checkpointing can fail before that
+	 * and the cleanup code needs to be able to tell if FPRs were saved or not
+	 * before trying to restore the register state.
+	 */
+	ti_s390->fpregs = NULL;
 
 	CORE_THREAD_ARCH_INFO(core) = ti_s390;
 	return 0;
 
-fail_free_gp_regs:
-	free_gp_regs(ti_s390->gpregs);
 fail_free_ti_s390:
 	xfree(ti_s390);
 	return -1;
@@ -678,14 +688,18 @@ static int set_task_regs(pid_t pid, CoreEntry *core)
 	user_fpregs_struct_t fpregs;
 
 	memset(&fpregs, 0, sizeof(fpregs));
-	/* Floating point registers */
+	/*
+	 * Floating point registers
+	 * Optional on checkpoint; checkpoint may have failed and we may reach here as part of cleanup
+	 * so there's no guarantee that we saved FPRs for this thread.
+	 */
 	cfpregs = CORE_THREAD_ARCH_INFO(core)->fpregs;
-	if (!cfpregs)
-		return -1;
-	fpregs.prfpreg.fpc = cfpregs->fpc;
-	memcpy(fpregs.prfpreg.fprs, cfpregs->fprs, sizeof(fpregs.prfpreg.fprs));
-	if (set_fp_regs(pid, &fpregs) < 0)
-		return -1;
+	if (cfpregs) {
+		fpregs.prfpreg.fpc = cfpregs->fpc;
+		memcpy(fpregs.prfpreg.fprs, cfpregs->fprs, sizeof(fpregs.prfpreg.fprs));
+		if (set_fp_regs(pid, &fpregs) < 0)
+			return -1;
+	}
 	/* Vector registers (optional) */
 	cvxrs_low = CORE_THREAD_ARCH_INFO(core)->vxrs_low;
 	if (cvxrs_low != NULL) {
diff --git a/criu/arch/s390/include/asm/dump.h b/criu/arch/s390/include/asm/dump.h
index c200724d7..5a24c5b3d 100644
--- a/criu/arch/s390/include/asm/dump.h
+++ b/criu/arch/s390/include/asm/dump.h
@@ -1,7 +1,7 @@
 #ifndef __CR_ASM_DUMP_H__
 #define __CR_ASM_DUMP_H__
 
-int save_task_regs(void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f);
+int save_task_regs(pid_t pid, void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f);
 int arch_alloc_thread_info(CoreEntry *core);
 void arch_free_thread_info(CoreEntry *core);
 
diff --git a/criu/arch/x86/Makefile b/criu/arch/x86/Makefile
index 618e85bb3..46f00e9e9 100644
--- a/criu/arch/x86/Makefile
+++ b/criu/arch/x86/Makefile
@@ -9,6 +9,7 @@ obj-y			+= cpu.o
 obj-y			+= crtools.o
 obj-y			+= kerndat.o
 obj-y			+= sigframe.o
+obj-y			+= shstk.o
 ifeq ($(CONFIG_COMPAT),y)
         obj-y		+= sigaction_compat.o
 endif
diff --git a/criu/arch/x86/cpu.c b/criu/arch/x86/cpu.c
index dfa31569f..2e1f2de9a 100644
--- a/criu/arch/x86/cpu.c
+++ b/criu/arch/x86/cpu.c
@@ -407,6 +407,12 @@ int cpu_validate_cpuinfo(void)
 	if (!img)
 		return -1;
 
+	if (empty_image(img)) {
+		pr_err("No cpuinfo image\n");
+		close_image(img);
+		return -1;
+	}
+
 	if (pb_read_one(img, &img_cpu_info, PB_CPUINFO) < 0)
 		goto err;
 
diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c
index d10e51e48..1f4d0736b 100644
--- a/criu/arch/x86/crtools.c
+++ b/criu/arch/x86/crtools.c
@@ -15,7 +15,7 @@
 
 #define XSAVE_PB_NELEMS(__s, __obj, __member) (sizeof(__s) / sizeof(*(__obj)->__member))
 
-int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
+int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
 {
 	CoreEntry *core = x;
 	UserX86RegsEntry *gpregs = core->thread_info->gpregs;
@@ -133,6 +133,14 @@ int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpre
 #undef assign_array
 #undef assign_xsave
 
+	if (compel_cpu_has_feature(X86_FEATURE_SHSTK)) {
+		UserX86CetEntry *cet = core->thread_info->fpregs->xsave->cet;
+		struct cet_user_state *regs = &fpregs->cet;
+
+		cet->cet = regs->cet;
+		cet->ssp = regs->ssp;
+	}
+
 	return 0;
 }
 
@@ -199,6 +207,13 @@ static int alloc_xsave_extends(UserX86XsaveEntry *xsave)
 			goto err;
 	}
 
+	if (compel_cpu_has_feature(X86_FEATURE_SHSTK)) {
+		xsave->cet = xzalloc(sizeof(UserX86CetEntry));
+		if (!xsave->cet)
+			goto err;
+		user_x86_cet_entry__init(xsave->cet);
+	}
+
 	return 0;
 err:
 	return -1;
@@ -220,6 +235,8 @@ int arch_alloc_thread_info(CoreEntry *core)
 		with_xsave = compel_cpu_has_feature(X86_FEATURE_OSXSAVE);
 		if (with_xsave)
 			sz += sizeof(UserX86XsaveEntry);
+		if (compel_cpu_has_feature(X86_FEATURE_SHSTK))
+			sz += sizeof(UserX86CetEntry);
 	}
 
 	m = xmalloc(sz);
@@ -433,7 +450,7 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
 #define assign_array(dst, src, e) memcpy(dst.e, (src)->e, sizeof(dst.e))
 #define assign_xsave(feature, xsave, member, area)                                                                \
 	do {                                                                                                      \
-		if (compel_fpu_has_feature(feature)) {                                                            \
+		if (compel_fpu_has_feature(feature) && (xsave->xstate_bv & (1UL << feature))) {                   \
 			uint32_t off = compel_fpu_feature_offset(feature);                                        \
 			void *to = &area[off];                                                                    \
 			void *from = xsave->member;                                                               \
diff --git a/criu/arch/x86/include/asm/compat.h b/criu/arch/x86/include/asm/compat.h
index 867357fa2..4ca704fd7 100644
--- a/criu/arch/x86/include/asm/compat.h
+++ b/criu/arch/x86/include/asm/compat.h
@@ -11,6 +11,8 @@
 
 #include <sys/mman.h>
 
+#include "log.h"
+
 static inline void *alloc_compat_syscall_stack(void)
 {
 	void *mem = (void *)sys_mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE,
diff --git a/criu/arch/x86/include/asm/dump.h b/criu/arch/x86/include/asm/dump.h
index 192f6bd02..925ea91ff 100644
--- a/criu/arch/x86/include/asm/dump.h
+++ b/criu/arch/x86/include/asm/dump.h
@@ -1,7 +1,7 @@
 #ifndef __CR_ASM_DUMP_H__
 #define __CR_ASM_DUMP_H__
 
-extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *);
+extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *);
 extern int arch_alloc_thread_info(CoreEntry *core);
 extern void arch_free_thread_info(CoreEntry *core);
 extern int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info);
diff --git a/criu/arch/x86/include/asm/kerndat.h b/criu/arch/x86/include/asm/kerndat.h
index 903bc80f7..5c3717230 100644
--- a/criu/arch/x86/include/asm/kerndat.h
+++ b/criu/arch/x86/include/asm/kerndat.h
@@ -4,5 +4,6 @@
 extern int kdat_compatible_cr(void);
 extern int kdat_can_map_vdso(void);
 extern int kdat_x86_has_ptrace_fpu_xsave_bug(void);
+extern int kdat_has_shstk(void);
 
 #endif /* __CR_ASM_KERNDAT_H__ */
diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h
index f7a6d5058..3a673958d 100644
--- a/criu/arch/x86/include/asm/restorer.h
+++ b/criu/arch/x86/include/asm/restorer.h
@@ -8,6 +8,7 @@
 #include <compel/plugins/std/syscall-codes.h>
 #include <compel/asm/sigframe.h>
 #include "asm/compat.h"
+#include "asm/shstk.h"
 
 #ifdef CONFIG_COMPAT
 extern void restore_tls(tls_t *ptls);
diff --git a/criu/arch/x86/include/asm/shstk.h b/criu/arch/x86/include/asm/shstk.h
new file mode 100644
index 000000000..d113fd8ab
--- /dev/null
+++ b/criu/arch/x86/include/asm/shstk.h
@@ -0,0 +1,304 @@
+#ifndef __CR_ASM_SHSTK_H__
+#define __CR_ASM_SHSTK_H__
+
+/*
+ * Shadow stack constants from Linux
+ */
+/* arch/x86/include/uapi/asm/mman.h */
+#ifndef SHADOW_STACK_SET_TOKEN
+#define SHADOW_STACK_SET_TOKEN 0x1     /* Set up a restore token in the shadow stack */
+#endif
+
+/* arch/x86/include/uapi/asm/prctl.h */
+#define ARCH_SHSTK_ENABLE	0x5001
+#define ARCH_SHSTK_DISABLE	0x5002
+#define ARCH_SHSTK_LOCK		0x5003
+#define ARCH_SHSTK_UNLOCK	0x5004
+#define ARCH_SHSTK_STATUS	0x5005
+
+#define ARCH_SHSTK_SHSTK	(1ULL << 0)
+#define ARCH_SHSTK_WRSS		(1ULL << 1)
+
+#define ARCH_HAS_SHSTK
+
+/* from arch/x86/kernel/shstk.c */
+#define SHSTK_DATA_BIT (1UL << 63)	/* BIT(63) */
+
+/*
+ * Shadow stack memory cannot be restored with memcpy/pread but only using
+ * a special instruction that can write to shadow stack.
+ * That instruction is only available when shadow stack is enabled,
+ * otherwise it causes #UD.
+ *
+ * Also, shadow stack VMAs cannot be mmap()ed or mrepmap()ed, they must be
+ * created using map_shadow_stack() system call. This pushes creation of
+ * shadow stack VMAs to the restorer blob after CRIU mappings are freed.
+ *
+ * And there is an additional jungling with shadow stacks to ensure that we
+ * don't unmap an active shadow stack
+ *
+ * The overall sequence of restoring shadow stack is
+ * - Enable shadow stack early after clone()ing the task
+ * - Unlock shadow stack features using ptrace
+ * - In the restorer blob:
+ *   - switch to a temporary shadow stack to be able to unmap shadow stack
+ *     with the CRIU mappings
+ *   - after memory mappigns are restored, recreate shadow stack VMAs,
+ *     populate them using wrss instruction and switch to the task shadow
+ *     stack
+ *   - lock shadow stack features
+ */
+struct rst_shstk_info {
+	unsigned long vma_start;	/* start of shadow stack VMA */
+	unsigned long vma_size;		/* size of shadow stack VMA */
+	unsigned long premmaped_addr;	/* address of shadow stack copy in
+					   the premmaped area */
+	unsigned long tmp_shstk;	/* address of temporary shadow stack */
+	u64 ssp;			/* shadow stack pointer */
+	u64 cet;			/* CET conrtol state */
+};
+#define rst_shstk_info rst_shstk_info
+
+struct task_restore_args;
+struct pstree_item;
+
+int arch_shstk_prepare(struct pstree_item *item, CoreEntry *core,
+		       struct task_restore_args *ta);
+#define arch_shstk_prepare arch_shstk_prepare
+
+int arch_shstk_unlock(struct pstree_item *item, CoreEntry *core, pid_t pid);
+#define arch_shstk_unlock arch_shstk_unlock
+
+int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
+		      int (*func)(void *arg), void *arg);
+#define arch_shstk_trampoline arch_shstk_trampoline
+
+static always_inline long shstk_restorer_stack_size(void)
+{
+	return PAGE_SIZE;
+}
+#define shstk_restorer_stack_size shstk_restorer_stack_size
+static always_inline void shstk_set_restorer_stack(struct rst_shstk_info *info, void *ptr)
+{
+	info->tmp_shstk = (unsigned long)ptr;
+}
+#define shstk_set_restorer_stack shstk_set_restorer_stack
+
+static always_inline long shstk_min_mmap_addr(struct rst_shstk_info *info, unsigned long __maybe_unused def)
+{
+	return !(info->cet & ARCH_SHSTK_SHSTK) ? def : (4UL << 30);
+}
+#define shstk_min_mmap_addr shstk_min_mmap_addr
+
+#ifdef CR_NOGLIBC
+
+#include <compel/plugins/std/syscall.h>
+#include <compel/cpu.h>
+#include "vma.h"
+
+#define SHSTK_BUSY_BIT (1UL << 0)	/* BIT(0) */
+
+static inline int shstk_map(unsigned long addr, unsigned long size)
+{
+	long shstk = sys_map_shadow_stack(addr, size, SHADOW_STACK_SET_TOKEN);
+
+	if (shstk < 0) {
+		pr_err("Failed to map shadow stack at %lx: %ld\n", addr, shstk);
+		return -1;
+	}
+
+	if (shstk != addr) {
+		pr_err("Shadow stack address mismatch: need %lx, got %lx\n", addr, shstk);
+		return -1;
+	}
+
+	pr_info("Created shadow stack at %lx\n", shstk);
+
+	return 0;
+}
+
+/* clang-format off */
+static inline unsigned long get_ssp(void)
+{
+	unsigned long ssp;
+
+	asm volatile("rdsspq %0" : "=r"(ssp) :: );
+
+	return ssp;
+}
+
+static inline void wrssq(unsigned long addr, unsigned long val)
+{
+	asm volatile("wrssq %1, (%0)" :: "r"(addr), "r"(val) : "memory");
+}
+/* clang-format off */
+
+static always_inline void shstk_switch_ssp(unsigned long new_ssp)
+{
+	unsigned long old_ssp = get_ssp();
+
+	asm volatile("rstorssp (%0)\n" :: "r"(new_ssp));
+	asm volatile("saveprevssp");
+
+	pr_debug("changed ssp from %lx to %lx\n", old_ssp, new_ssp);
+}
+
+/*
+ * Disable writes to the shadow stack and lock it's disable/enable control
+ */
+static inline int shstk_finalize(void)
+{
+	int ret = 0;
+
+	ret = sys_arch_prctl(ARCH_SHSTK_DISABLE, ARCH_SHSTK_WRSS);
+	if (ret) {
+		pr_err("Failed to disable writes to shadow stack\n");
+		return ret;
+	}
+
+	ret = sys_arch_prctl(ARCH_SHSTK_LOCK, ARCH_SHSTK_SHSTK);
+	if (ret)
+		pr_err("Failed to lock shadow stack controls\n");
+
+	return ret;
+}
+
+/*
+ * Create shadow stack vma and restore its content from premmapped anonymous (non-shstk) vma
+ */
+static always_inline int shstk_vma_restore(VmaEntry *vma_entry)
+{
+	long shstk, i;
+	unsigned long *shstk_data = (void *)vma_premmaped_start(vma_entry);
+	unsigned long vma_size = vma_entry_len(vma_entry);
+	long ret;
+
+	shstk = sys_map_shadow_stack(0, vma_size, SHADOW_STACK_SET_TOKEN);
+	if (shstk < 0) {
+		pr_err("Failed to map shadow stack: %ld\n", shstk);
+		return -1;
+	}
+
+	/* restore shadow stack contents */
+	for (i = 0; i < vma_size / 8; i++)
+		wrssq(shstk + i * 8, shstk_data[i]);
+
+	ret = sys_munmap(shstk_data, vma_size);
+	if (ret < 0) {
+		pr_err("Failed to unmap premmaped shadow stack\n");
+		return ret;
+	}
+
+	/*
+	 * From that point premapped vma is (shstk) and we need
+	 * to mremap() it to the final location. Originally premapped
+	 * (shstk_data) has been unmapped already.
+	 */
+	vma_premmaped_start(vma_entry) = shstk;
+
+	return 0;
+}
+#define shstk_vma_restore shstk_vma_restore
+
+/*
+ * Restore contents of the shadow stack and set shadow stack pointer
+ */
+static always_inline int shstk_restore(struct rst_shstk_info *cet)
+{
+	unsigned long ssp, val;
+
+	if (!(cet->cet & ARCH_SHSTK_SHSTK))
+		return 0;
+
+	/*
+	 * Add tokens for sigreturn frame and for switch of the shadow stack.
+	 * The sigreturn token will be checked by the kernel during
+	 * processing of sigreturn
+	 * The token for stack switch is required by rstorssp and
+	 * saveprevssp semantics
+	 */
+
+	/* token for sigreturn frame */
+	ssp = cet->ssp - 8;
+	val = ALIGN_DOWN(cet->ssp, 8) | SHSTK_DATA_BIT;
+	wrssq(ssp, val);
+
+	/* shadow stack switch token */
+	val = ssp | SHSTK_BUSY_BIT;
+	ssp -= 8;
+	wrssq(ssp, val);
+
+	/* reset shadow stack pointer to the proper location */
+	shstk_switch_ssp(ssp);
+
+	return shstk_finalize();
+}
+#define arch_shstk_restore shstk_restore
+
+/*
+ * Disable shadow stack
+ */
+static inline int shstk_disable(void)
+{
+	int ret;
+
+	ret = sys_arch_prctl(ARCH_SHSTK_DISABLE, ARCH_SHSTK_WRSS);
+	if (ret) {
+		pr_err("Failed to disable writes to shadow stack\n");
+		return ret;
+	}
+
+	ret = sys_arch_prctl(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK);
+	if (ret) {
+		pr_err("Failed to disable shadow stack\n");
+		return ret;
+	}
+
+	ret = sys_arch_prctl(ARCH_SHSTK_LOCK, ARCH_SHSTK_SHSTK);
+	if (ret)
+		pr_err("Failed to lock shadow stack controls\n");
+
+	return 0;
+}
+
+/*
+ * Switch to temporary shadow stack
+ */
+static always_inline int shstk_switch_to_restorer(struct rst_shstk_info *cet)
+{
+	unsigned long ssp;
+	long ret;
+
+	if (!(cet->cet & ARCH_SHSTK_SHSTK))
+		return 0;
+
+	ret = sys_munmap((void *)cet->tmp_shstk, PAGE_SIZE);
+	if (ret < 0) {
+		pr_err("Failed to unmap area for temporary shadow stack\n");
+		return -1;
+	}
+
+	ret = shstk_map(cet->tmp_shstk, PAGE_SIZE);
+	if (ret < 0)
+		return -1;
+
+	/*
+	 * Switch shadow stack from the default created by the kernel to a
+	 * temporary shadow stack allocated in the premmaped area
+	 */
+	ssp = cet->tmp_shstk + PAGE_SIZE - 8;
+	shstk_switch_ssp(ssp);
+
+	ret = sys_arch_prctl(ARCH_SHSTK_ENABLE, ARCH_SHSTK_WRSS);
+	if (ret) {
+		pr_err("Failed to enable writes to shadow stack\n");
+		return ret;
+	}
+
+	return 0;
+}
+#define arch_shstk_switch_to_restorer shstk_switch_to_restorer
+
+#endif /* CR_NOGLIBC */
+
+#endif /* __CR_ASM_SHSTK_H__ */
diff --git a/criu/arch/x86/include/asm/vdso.h b/criu/arch/x86/include/asm/vdso.h
index 3b3f292bd..ca46374a5 100644
--- a/criu/arch/x86/include/asm/vdso.h
+++ b/criu/arch/x86/include/asm/vdso.h
@@ -12,7 +12,7 @@
  * This is a minimal amount of symbols
  * we should support at the moment.
  */
-#define VDSO_SYMBOL_MAX	 6
+#define VDSO_SYMBOL_MAX	 7
 #define VDSO_SYMBOL_GTOD 2
 
 /*
@@ -42,11 +42,12 @@
 	const char *aarch_vdso_symbol3 = "__vdso_gettimeofday";  \
 	const char *aarch_vdso_symbol4 = "__vdso_time";          \
 	const char *aarch_vdso_symbol5 = "__kernel_sigreturn";   \
-	const char *aarch_vdso_symbol6 = "__kernel_rt_sigreturn";
+	const char *aarch_vdso_symbol6 = "__kernel_rt_sigreturn"; \
+	const char *aarch_vdso_symbol7 = "__vdso_clock_gettime64"; \
 
 #define ARCH_VDSO_SYMBOLS                                                                                   \
 	aarch_vdso_symbol1, aarch_vdso_symbol2, aarch_vdso_symbol3, aarch_vdso_symbol4, aarch_vdso_symbol5, \
-		aarch_vdso_symbol6
+		aarch_vdso_symbol6, aarch_vdso_symbol7
 
 /*	"__kernel_vsyscall",			*/
 
diff --git a/criu/arch/x86/kerndat.c b/criu/arch/x86/kerndat.c
index a98797d39..3a58bbea7 100644
--- a/criu/arch/x86/kerndat.c
+++ b/criu/arch/x86/kerndat.c
@@ -17,6 +17,7 @@
 
 #include "asm/compat.h"
 #include "asm/dump.h"
+#include "asm/shstk.h"
 
 int kdat_can_map_vdso(void)
 {
@@ -251,3 +252,29 @@ out_kill:
 
 	return ret;
 }
+
+/*
+ * Unlike most kerndat knobs, this does not check for availability of the
+ * shadow stack in the kernel, but rather checks if criu runs with shadow
+ * stack enabled.
+ *
+ * This depends on hardware availability, kernel and glibc support, compiler
+ * options and glibc tunables.
+ */
+int kdat_has_shstk(void)
+{
+	unsigned long features;
+
+	if (!compel_cpu_has_feature(X86_FEATURE_SHSTK))
+		return 0;
+
+	if (syscall(__NR_arch_prctl, ARCH_SHSTK_STATUS, &features)) {
+		/* kernels that don't support shadow stack return -EINVAL */
+		if (errno == EINVAL)
+			return 0;
+		pr_perror("Cannot get shadow stack status");
+		return 1;
+	}
+
+	return !!(features & ARCH_SHSTK_SHSTK);
+}
diff --git a/criu/arch/x86/shstk.c b/criu/arch/x86/shstk.c
new file mode 100644
index 000000000..0810efac5
--- /dev/null
+++ b/criu/arch/x86/shstk.c
@@ -0,0 +1,222 @@
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+
+#include <common/list.h>
+
+#include <compel/cpu.h>
+
+#include "pstree.h"
+#include "restorer.h"
+#include "rst-malloc.h"
+#include "vma.h"
+
+static bool task_needs_shstk(struct pstree_item *item, CoreEntry *core)
+{
+	UserX86FpregsEntry *fpregs;
+
+	if (!task_alive(item))
+		return false;
+
+	fpregs = core->thread_info->fpregs;
+	if (fpregs->xsave && fpregs->xsave->cet) {
+		if (!compel_cpu_has_feature(X86_FEATURE_SHSTK)) {
+			pr_warn_once("Restoring task with shadow stack on non-CET machine\n");
+			return false;
+		}
+
+		if (fpregs->xsave->cet->cet & ARCH_SHSTK_SHSTK)
+			return true;
+	}
+
+	return false;
+}
+
+static int shstk_prepare_task(struct vm_area_list *vmas,
+			      struct rst_shstk_info *shstk)
+{
+	struct vma_area *vma;
+
+	list_for_each_entry(vma, &vmas->h, list) {
+		if (vma_area_is(vma, VMA_AREA_SHSTK) &&
+		    in_vma_area(vma, shstk->ssp)) {
+			unsigned long premmaped_addr = vma->premmaped_addr;
+			unsigned long size = vma_area_len(vma);
+
+			shstk->vma_start = vma->e->start;
+			shstk->vma_size = size;
+			shstk->premmaped_addr = premmaped_addr;
+
+			break;
+		}
+	}
+
+	return 0;
+}
+
+int arch_shstk_prepare(struct pstree_item *item, CoreEntry *core,
+		       struct task_restore_args *ta)
+{
+	struct thread_restore_args *args_array = (struct thread_restore_args *)(&ta[1]);
+	UserX86FpregsEntry *fpregs = core->thread_info->fpregs;
+	struct vm_area_list *vmas = &rsti(item)->vmas;
+	struct rst_shstk_info *shstk = &ta->shstk;
+	int i;
+
+	if (!task_needs_shstk(item, core))
+		return 0;
+
+	shstk->cet = fpregs->xsave->cet->cet;
+	shstk->ssp = fpregs->xsave->cet->ssp;
+
+	if (shstk_prepare_task(vmas, shstk)) {
+		pr_err("Failed to prepare shadow stack memory\n");
+		return -1;
+	}
+
+	for (i = 0; i < item->nr_threads; i++) {
+		struct thread_restore_args *thread_args = &args_array[i];
+
+		core = item->core[i];
+		fpregs = core->thread_info->fpregs;
+		shstk = &thread_args->shstk;
+
+		shstk->cet = fpregs->xsave->cet->cet;
+		shstk->ssp = fpregs->xsave->cet->ssp;
+		if (shstk_prepare_task(vmas, shstk)) {
+			pr_err("Failed to prepare shadow stack memory\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int arch_shstk_unlock(struct pstree_item *item, CoreEntry *core, pid_t pid)
+{
+	unsigned long features;
+	int status;
+	int ret = -1;
+
+	/*
+	 * CRIU runs with no shadow stack and the task does not need one,
+	 * nothing to do.
+	 */
+	if (!kdat.has_shstk && !task_needs_shstk(item, core))
+		return 0;
+
+	futex_wait_until(&rsti(item)->shstk_enable, 1);
+
+	if (ptrace(PTRACE_SEIZE, pid, 0, 0)) {
+		pr_perror("Cannot attach to %d", pid);
+		goto futex_wake;
+	}
+
+	if (ptrace(PTRACE_INTERRUPT, pid, 0, 0)) {
+		pr_perror("Cannot interrupt the %d task", pid);
+		goto detach;
+	}
+
+	if (wait4(pid, &status, __WALL, NULL) != pid) {
+		pr_perror("waitpid(%d) failed", pid);
+		goto detach;
+	}
+
+	features = ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS;
+	if (ptrace(PTRACE_ARCH_PRCTL, pid, features, ARCH_SHSTK_UNLOCK)) {
+		pr_perror("Cannot unlock CET for %d task", pid);
+		goto detach;
+	}
+
+detach:
+	if (ptrace(PTRACE_DETACH, pid, NULL, 0)) {
+		pr_perror("Unable to detach %d", pid);
+		goto futex_wake;
+	}
+
+	ret = 0;
+
+futex_wake:
+	futex_set_and_wake(&rsti(item)->shstk_unlock, 1);
+
+	return ret;
+}
+
+static void shstk_sync_unlock(struct pstree_item *item)
+{
+	/* notify parent that shadow stack is enabled ... */
+	futex_set_and_wake(&rsti(item)->shstk_enable, 1);
+
+	/* ... and wait until it unlocks its features with ptrace */
+	futex_wait_until(&rsti(item)->shstk_unlock, 1);
+}
+
+static void __arch_shstk_enable(struct pstree_item *item,
+				int (*func)(void *arg), void *arg)
+{
+	int ret;
+
+	shstk_sync_unlock(item);
+
+	/* return here would cause #CP, use exit() instead */
+	ret = func(arg);
+	exit(ret);
+}
+
+static int shstk_disable(struct pstree_item *item)
+{
+	shstk_sync_unlock(item);
+
+	/* disable shadow stack, implicitly clears ARCH_SHSTK_WRSS */
+	if (syscall(__NR_arch_prctl, ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) {
+		pr_perror("Failed to disable shadow stack");
+		return -1;
+	}
+
+	if (syscall(__NR_arch_prctl, ARCH_SHSTK_LOCK,
+		    ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS)) {
+		pr_perror("Failed to lock shadow stack controls");
+		return -1;
+	}
+
+	return 0;
+}
+
+int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
+		      int (*func)(void *arg), void *arg)
+{
+	unsigned long features = ARCH_SHSTK_SHSTK;
+	int code = ARCH_SHSTK_ENABLE;
+
+	/*
+	 * If task does not need shadow stack but CRIU runs with shadow
+	 * stack enabled, we should disable it before continuing with
+	 * restore
+	 */
+	if (!task_needs_shstk(item, core)) {
+		if (kdat.has_shstk && shstk_disable(item))
+			return -1;
+		return func(arg);
+	}
+
+	/*
+	 * Calling sys_arch_prctl() means there will be use of retq
+	 * instruction after shadow stack is enabled and this will cause
+	 * Control Protectiond fault. Open code sys_arch_prctl() in
+	 * assembly.
+	 *
+	 * code and addr should be in %rdi and %rsi and will be passed to
+	 * the system call as is.
+	 */
+	asm volatile("movq $"__stringify(__NR_arch_prctl)", %%rax	\n"
+		     "syscall						\n"
+		     "cmpq $0, %%rax					\n"
+		     "je 1f						\n"
+		     "retq						\n"
+		     "1:						\n"
+		     :: "D"(code), "S"(features));
+
+	__arch_shstk_enable(item, func, arg);
+
+	/* never reached */
+	return -1;
+}
diff --git a/criu/arch/x86/sigframe.c b/criu/arch/x86/sigframe.c
index 4fa7eb3dc..46612e70d 100644
--- a/criu/arch/x86/sigframe.c
+++ b/criu/arch/x86/sigframe.c
@@ -23,7 +23,7 @@ int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, struct rt_sigframe *r
 		}
 
 		sigframe->native.uc.uc_mcontext.fpstate = (uint64_t)addr;
-	} else if (!sigframe->is_native) {
+	} else {
 		unsigned long addr = (unsigned long)(void *)&fpu_state->fpu_state_ia32.xsave;
 		sigframe->compat.uc.uc_mcontext.fpstate = (uint32_t)(unsigned long)(void *)&fpu_state->fpu_state_ia32;
 		if ((addr % 64ul)) {
diff --git a/criu/autofs.c b/criu/autofs.c
index c662bea60..a1775cbc9 100644
--- a/criu/autofs.c
+++ b/criu/autofs.c
@@ -431,8 +431,7 @@ static int access_autofs_mount(struct mount_info *pm)
 		pr_err("failed to fork\n");
 		goto close_autofs_mnt;
 	case 0:
-		/* We don't care about results.
-			 * All we need is to "touch" */
+		/* We don't care about results, all we need is to "touch" */
 		/* coverity[check_return] */
 		openat(autofs_mnt, mnt_path, O_RDONLY | O_NONBLOCK | O_DIRECTORY);
 		_exit(0);
@@ -659,7 +658,7 @@ static int autofs_mnt_make_catatonic(const char *mnt_path, int mnt_fd)
 
 static int autofs_mnt_set_timeout(time_t timeout, const char *mnt_path, int mnt_fd)
 {
-	pr_info("%s: set timeout %ld for %s\n", __func__, timeout, mnt_path);
+	pr_info("%s: set timeout %" PRId64 " for %s\n", __func__, (int64_t)timeout, mnt_path);
 	return autofs_ioctl(mnt_path, mnt_fd, AUTOFS_IOC_SETTIMEOUT, &timeout);
 }
 
@@ -771,7 +770,7 @@ static int autofs_post_mount(const char *mnt_path, dev_t mnt_dev, time_t timeout
 	}
 
 	if (autofs_mnt_set_timeout(timeout, mnt_path, mnt_fd)) {
-		pr_err("Failed to set timeout %ld for %s\n", timeout, mnt_path);
+		pr_err("Failed to set timeout %" PRId64 " for %s\n", (int64_t)timeout, mnt_path);
 		return -1;
 	}
 
diff --git a/criu/cgroup-props.c b/criu/cgroup-props.c
index 5bed7dd9d..1b85c5b5a 100644
--- a/criu/cgroup-props.c
+++ b/criu/cgroup-props.c
@@ -35,12 +35,29 @@ static const char *____criu_global_props____[] = {
 	"tasks",
 };
 
+/* cgroup2 global properties */
+// clang-format off
+static const char *____criu_global_props_v2____[] = {
+	"cgroup.subtree_control",
+	"cgroup.max.descendants",
+	"cgroup.max.depth",
+	"cgroup.freeze",
+	"cgroup.type",
+};
+// clang-format on
+
 cgp_t cgp_global = {
 	.name = "____criu_global_props____",
 	.nr_props = ARRAY_SIZE(____criu_global_props____),
 	.props = ____criu_global_props____,
 };
 
+cgp_t cgp_global_v2 = {
+	.name = "____criu_global_props_v2____",
+	.nr_props = ARRAY_SIZE(____criu_global_props_v2____),
+	.props = ____criu_global_props_v2____,
+};
+
 typedef struct {
 	struct list_head list;
 	cgp_t cgp;
diff --git a/criu/cgroup.c b/criu/cgroup.c
index e05b0832e..9246be639 100644
--- a/criu/cgroup.c
+++ b/criu/cgroup.c
@@ -8,6 +8,7 @@
 #include <ftw.h>
 #include <libgen.h>
 #include <sched.h>
+#include <sys/wait.h>
 
 #include "common/list.h"
 #include "xmalloc.h"
@@ -54,6 +55,7 @@ static u32 cg_set_ids = 1;
 
 static LIST_HEAD(cgroups);
 static unsigned int n_cgroups;
+static pid_t cgroupd_pid;
 
 static CgSetEntry *find_rst_set_by_id(u32 id)
 {
@@ -173,6 +175,7 @@ struct cg_controller *new_controller(const char *name)
 	nc->n_controllers = 1;
 
 	nc->n_heads = 0;
+	nc->is_threaded = false;
 	INIT_LIST_HEAD(&nc->heads);
 
 	return nc;
@@ -245,7 +248,7 @@ static int find_dir(const char *path, struct list_head *dirs, struct cgroup_dir
 			return EXACT_MATCH;
 		}
 
-		if (strstartswith(path, d->path)) {
+		if (issubpath(path, d->path)) {
 			int ret = find_dir(path, &d->children, rdir);
 			if (ret == NO_MATCH) {
 				*rdir = d;
@@ -370,7 +373,8 @@ static void free_all_cgroup_props(struct cgroup_dir *ncd)
 	ncd->n_properties = 0;
 }
 
-static int dump_cg_props_array(const char *fpath, struct cgroup_dir *ncd, const cgp_t *cgp)
+static int dump_cg_props_array(const char *fpath, struct cgroup_dir *ncd, const cgp_t *cgp,
+			       struct cg_controller *controller)
 {
 	int j;
 	char buf[PATH_MAX];
@@ -421,6 +425,14 @@ static int dump_cg_props_array(const char *fpath, struct cgroup_dir *ncd, const
 			prop->value = new;
 		}
 
+		/*
+		 * Set the is_threaded flag if cgroup.type's value is threaded
+		 * or it is a cgroup v1 (it has a 'tasks' property).
+		 * Ignore all other values.
+		 */
+		if ((!strcmp("cgroup.type", prop->name) && !strcmp("threaded", prop->value)) || !strcmp("tasks", prop->name))
+			controller->is_threaded = true;
+
 		pr_info("Dumping value %s from %s/%s\n", prop->value, fpath, prop->name);
 		list_add_tail(&prop->list, &ncd->properties);
 		ncd->n_properties++;
@@ -436,12 +448,20 @@ static int add_cgroup_properties(const char *fpath, struct cgroup_dir *ncd, stru
 	for (i = 0; i < controller->n_controllers; ++i) {
 		const cgp_t *cgp = cgp_get_props(controller->controllers[i]);
 
-		if (dump_cg_props_array(fpath, ncd, cgp) < 0) {
+		if (dump_cg_props_array(fpath, ncd, cgp, controller) < 0) {
 			pr_err("dumping known properties failed\n");
 			return -1;
 		}
+	}
 
-		if (dump_cg_props_array(fpath, ncd, &cgp_global) < 0) {
+	/* cgroup v2 */
+	if (controller->controllers[0][0] == 0) {
+		if (dump_cg_props_array(fpath, ncd, &cgp_global_v2, controller) < 0) {
+			pr_err("dumping global properties v2 failed\n");
+			return -1;
+		}
+	} else {
+		if (dump_cg_props_array(fpath, ncd, &cgp_global, controller) < 0) {
 			pr_err("dumping global properties failed\n");
 			return -1;
 		}
@@ -560,14 +580,15 @@ static int __new_open_cgroupfs(struct cg_ctl *cc)
 	int fsfd, fd;
 	char *name;
 
-	fsfd = sys_fsopen(fstype, 0);
+	fsfd = cr_fsopen(fstype, 0);
 	if (fsfd < 0) {
 		pr_perror("Unable to open the cgroup file system");
 		return -1;
 	}
 
 	if (strstartswith(cc->name, namestr)) {
-		if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "name", cc->name + strlen(namestr), 0)) {
+		if (cr_fsconfig(fsfd, FSCONFIG_SET_STRING, "name", cc->name + strlen(namestr), 0)) {
+			fsfd_dump_messages(fsfd);
 			pr_perror("Unable to configure the cgroup (%s) file system", cc->name);
 			goto err;
 		}
@@ -575,7 +596,8 @@ static int __new_open_cgroupfs(struct cg_ctl *cc)
 		char *saveptr = NULL, *buf = strdupa(cc->name);
 		name = strtok_r(buf, ",", &saveptr);
 		while (name) {
-			if (sys_fsconfig(fsfd, FSCONFIG_SET_FLAG, name, NULL, 0)) {
+			if (cr_fsconfig(fsfd, FSCONFIG_SET_FLAG, name, NULL, 0)) {
+				fsfd_dump_messages(fsfd);
 				pr_perror("Unable to configure the cgroup (%s) file system", name);
 				goto err;
 			}
@@ -583,14 +605,17 @@ static int __new_open_cgroupfs(struct cg_ctl *cc)
 		}
 	}
 
-	if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)) {
+	if (cr_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)) {
+		fsfd_dump_messages(fsfd);
 		pr_perror("Unable to create the cgroup (%s) file system", cc->name);
 		goto err;
 	}
 
-	fd = sys_fsmount(fsfd, 0, 0);
-	if (fd < 0)
+	fd = cr_fsmount(fsfd, 0, 0);
+	if (fd < 0) {
+		fsfd_dump_messages(fsfd);
 		pr_perror("Unable to mount the cgroup (%s) file system", cc->name);
+	}
 	close(fsfd);
 
 	return fd;
@@ -619,8 +644,8 @@ static int open_cgroupfs(struct cg_ctl *cc)
 		return -1;
 	}
 
-	if (mount("none", prefix, fstype, 0, mopts) < 0) {
-		pr_perror("Unable to mount %s", mopts);
+	if (mount("none", prefix, fstype, 0, mopts[0] ? mopts : NULL) < 0) {
+		pr_perror("Unable to mount %s %s", fstype, mopts);
 		rmdir(prefix);
 		return -1;
 	}
@@ -694,6 +719,8 @@ static int collect_cgroups(struct list_head *ctls)
 			}
 		} else {
 			fd = open_cgroupfs(cc);
+			if (fd < 0)
+				return -1;
 		}
 
 		path_pref_len = snprintf(path, PATH_MAX, "/proc/self/fd/%d", fd);
@@ -726,20 +753,28 @@ static int collect_cgroups(struct list_head *ctls)
 	return 0;
 }
 
-int dump_task_cgroup(struct pstree_item *item, u32 *cg_id, struct parasite_dump_cgroup_args *args)
+int dump_thread_cgroup(const struct pstree_item *item, u32 *cg_id, struct parasite_dump_cgroup_args *args, int id)
 {
-	int pid;
+	int pid, tid;
 	LIST_HEAD(ctls);
 	unsigned int n_ctls = 0;
 	struct cg_set *cs;
 
+	if (opts.unprivileged)
+		return 0;
+
 	if (item)
 		pid = item->pid->real;
 	else
 		pid = getpid();
 
-	pr_info("Dumping cgroups for %d\n", pid);
-	if (parse_task_cgroup(pid, args, &ctls, &n_ctls))
+	if (id < 0)
+		tid = pid;
+	else
+		tid = item->threads[id].real;
+
+	pr_info("Dumping cgroups for thread %d\n", tid);
+	if (parse_thread_cgroup(pid, tid, args, &ctls, &n_ctls))
 		return -1;
 
 	cs = get_cg_set(&ctls, n_ctls, item);
@@ -752,9 +787,10 @@ int dump_task_cgroup(struct pstree_item *item, u32 *cg_id, struct parasite_dump_
 		pr_info("Set %d is criu one\n", cs->id);
 	} else {
 		if (item == root_item) {
-			BUG_ON(root_cgset);
-			root_cgset = cs;
-			pr_info("Set %d is root one\n", cs->id);
+			if (!root_cgset) {
+				root_cgset = cs;
+				pr_info("Set %d is root one\n", cs->id);
+			}
 		} else {
 			struct cg_ctl *root, *stray;
 
@@ -901,6 +937,8 @@ static int dump_controllers(CgroupEntry *cg)
 	list_for_each_entry(cur, &cgroups, l) {
 		cg_controller_entry__init(ce);
 
+		ce->has_is_threaded = true;
+		ce->is_threaded = cur->is_threaded;
 		ce->cnames = cur->controllers;
 		ce->n_cnames = cur->n_controllers;
 		ce->n_dirs = cur->n_heads;
@@ -988,6 +1026,9 @@ int dump_cgroups(void)
 	CgroupEntry cg = CGROUP_ENTRY__INIT;
 	int ret = -1;
 
+	if (opts.unprivileged)
+		return 0;
+
 	BUG_ON(!criu_cgset || !root_cgset);
 
 	/*
@@ -1054,8 +1095,15 @@ static int ctrl_dir_and_opt(CgControllerEntry *ctl, char *dir, int ds, char *opt
  * it. We restore these properties as soon as the cgroup is created.
  */
 static const char *special_props[] = {
-	"cpuset.cpus",	     "cpuset.mems",	   "devices.list",	   "memory.kmem.limit_in_bytes",
-	"memory.swappiness", "memory.oom_control", "memory.use_hierarchy", NULL,
+	"cpuset.cpus",
+	"cpuset.mems",
+	"devices.list",
+	"memory.kmem.limit_in_bytes",
+	"memory.swappiness",
+	"memory.oom_control",
+	"memory.use_hierarchy",
+	"cgroup.type",
+	NULL,
 };
 
 bool is_special_property(const char *prop)
@@ -1161,17 +1209,12 @@ static int prepare_cgns(CgSetEntry *se)
 	return 0;
 }
 
-static int move_in_cgroup(CgSetEntry *se, bool setup_cgns)
+static int move_in_cgroup(CgSetEntry *se)
 {
 	int i;
 
 	pr_info("Move into %d\n", se->id);
 
-	if (setup_cgns && prepare_cgns(se) < 0) {
-		pr_err("failed preparing cgns\n");
-		return -1;
-	}
-
 	for (i = 0; i < se->n_ctls; i++) {
 		char aux[PATH_MAX];
 		int fd = -1, err, j, aux_off;
@@ -1211,7 +1254,44 @@ static int move_in_cgroup(CgSetEntry *se, bool setup_cgns)
 	return 0;
 }
 
-int prepare_task_cgroup(struct pstree_item *me)
+int prepare_cgroup_namespace(struct pstree_item *root_task)
+{
+	CgSetEntry *se;
+
+	if (opts.manage_cgroups == CG_MODE_IGNORE)
+		return 0;
+
+	if (root_task->parent) {
+		pr_err("Expecting root_task to restore cgroup namespace\n");
+		return -1;
+	}
+
+	/*
+	 * If on dump all dumped tasks are in same cgset with criu we don't
+	 * dump cgsets and thus cgroup namespaces and rely that on restore
+	 * criu caller would prepare proper cgset/cgns for us. Also in case
+	 * of --unprivileged we don't even have the root cgset here.
+	 */
+	if (!rsti(root_task)->cg_set || rsti(root_task)->cg_set == root_cg_set) {
+		pr_info("Cgroup namespace inherited from parent\n");
+		return 0;
+	}
+
+	se = find_rst_set_by_id(rsti(root_task)->cg_set);
+	if (!se) {
+		pr_err("No set %d found\n", rsti(root_task)->cg_set);
+		return -1;
+	}
+
+	if (prepare_cgns(se) < 0) {
+		pr_err("failed preparing cgns\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int restore_task_cgroup(struct pstree_item *me)
 {
 	struct pstree_item *parent = me->parent;
 	CgSetEntry *se;
@@ -1243,13 +1323,7 @@ int prepare_task_cgroup(struct pstree_item *me)
 		return -1;
 	}
 
-	/* Since don't support nesting of cgroup namespaces, let's only set up
-	 * the cgns (if it exists) in the init task. In the future, we should
-	 * just check that the cgns prefix string matches for all the entries
-	 * in the cgset, and only unshare if that's true.
-	 */
-
-	return move_in_cgroup(se, !me->parent);
+	return move_in_cgroup(se);
 }
 
 void fini_cgroup(void)
@@ -1268,39 +1342,75 @@ void fini_cgroup(void)
 	cg_yard = NULL;
 }
 
-static int restore_perms(int fd, const char *path, CgroupPerms *perms)
+static int add_subtree_control_prop_prefix(char *input, char *output, char prefix)
 {
-	struct stat sb;
+	char *current, *next;
+	size_t len, off = 0;
 
-	if (perms) {
-		if (fstat(fd, &sb) < 0) {
-			pr_perror("stat of property %s failed", path);
-			return -1;
-		}
+	current = input;
+	do {
+		next = strchrnul(current, ' ');
+		len = next - current;
 
-		/* only chmod/chown if the perms are actually different: we aren't
-		 * allowed to chmod some cgroup props (e.g. the read only ones), so we
-		 * don't want to try if the perms already match.
-		 */
-		if (sb.st_mode != (mode_t)perms->mode && fchmod(fd, perms->mode) < 0) {
-			pr_perror("chmod of %s failed", path);
-			return -1;
-		}
+		output[off] = prefix;
+		off++;
+		memcpy(output + off, current, len);
+		off += len;
+		output[off] = ' ';
+		off++;
 
-		if ((sb.st_uid != perms->uid || sb.st_gid != perms->gid) && fchown(fd, perms->uid, perms->gid)) {
-			pr_perror("chown of %s failed", path);
-			return -1;
-		}
+		current = next + 1;
+	} while (*next != '\0');
+
+	return off;
+}
+
+static int restore_cgroup_subtree_control(const CgroupPropEntry *cg_prop_entry_p, int fd)
+{
+	char buf[1024];
+	char line[1024];
+	int ret, off = 0;
+
+	ret = read(fd, buf, sizeof(buf) - 1);
+	if (ret < 0) {
+		pr_perror("read from cgroup.subtree_control");
+		return ret;
+	}
+	/* Remove the trailing newline */
+	buf[ret] = '\0';
+
+	/* Remove all current subsys in subtree_control */
+	if (buf[0] != '\0')
+		off = add_subtree_control_prop_prefix(buf, line, '-');
+
+	/* Add subsys need to be restored in subtree_control */
+	if (cg_prop_entry_p->value[0] != '\0')
+		off += add_subtree_control_prop_prefix(cg_prop_entry_p->value, line + off, '+');
+
+	/* Remove the trailing space */
+	if (off != 0) {
+		off--;
+		line[off] = '\0';
+	}
+
+	if (write(fd, line, off) != off) {
+		pr_perror("write to cgroup.subtree_control");
+		return -1;
 	}
 
 	return 0;
 }
 
+/*
+ * Note: The path string can be modified in this function,
+ * the length of path string should be at least PATH_MAX.
+ */
 static int restore_cgroup_prop(const CgroupPropEntry *cg_prop_entry_p, char *path, int off, bool split_lines,
 			       bool skip_fails)
 {
-	int cg, fd, ret = -1;
+	int cg, fd, exit_code = -1, flag;
 	CgroupPerms *perms = cg_prop_entry_p->perms;
+	int is_subtree_control = !strcmp(cg_prop_entry_p->name, "cgroup.subtree_control");
 
 	if (opts.manage_cgroups == CG_MODE_IGNORE)
 		return 0;
@@ -1317,19 +1427,35 @@ static int restore_cgroup_prop(const CgroupPropEntry *cg_prop_entry_p, char *pat
 
 	pr_info("Restoring cgroup property value [%s] to [%s]\n", cg_prop_entry_p->value, path);
 
+	if (is_subtree_control)
+		flag = O_RDWR;
+	else
+		flag = O_WRONLY;
+
 	cg = get_service_fd(CGROUP_YARD);
-	fd = openat(cg, path, O_WRONLY);
+	fd = openat(cg, path, flag);
 	if (fd < 0) {
 		pr_perror("bad cgroup path: %s", path);
 		return -1;
 	}
 
-	if (restore_perms(fd, path, perms) < 0)
+	if (perms && cr_fchperm(fd, perms->uid, perms->gid, perms->mode) < 0)
 		goto out;
 
 	/* skip these two since restoring their values doesn't make sense */
 	if (!strcmp(cg_prop_entry_p->name, "cgroup.procs") || !strcmp(cg_prop_entry_p->name, "tasks")) {
-		ret = 0;
+		exit_code = 0;
+		goto out;
+	}
+
+	if (is_subtree_control) {
+		exit_code = restore_cgroup_subtree_control(cg_prop_entry_p, fd);
+		goto out;
+	}
+
+	/* skip restoring cgroup.type if its value is not "threaded" */
+	if (!strcmp(cg_prop_entry_p->name, "cgroup.type") && strcmp(cg_prop_entry_p->value, "threaded")) {
+		exit_code = 0;
 		goto out;
 	}
 
@@ -1351,21 +1477,28 @@ static int restore_cgroup_prop(const CgroupPropEntry *cg_prop_entry_p, char *pat
 		} while (*next_line != '\0');
 	} else {
 		size_t len = strlen(cg_prop_entry_p->value);
+		int ret;
 
-		if (write(fd, cg_prop_entry_p->value, len) != len) {
+		ret = write(fd, cg_prop_entry_p->value, len);
+		/* memory.kmem.limit_in_bytes has been deprecated. Look at
+		 * 58056f77502f3 ("memcg, kmem: further deprecate
+		 * kmem.limit_in_bytes") for more details. */
+		if (ret == -1 && errno == EOPNOTSUPP &&
+		    !strcmp(cg_prop_entry_p->name, "memory.kmem.limit_in_bytes"))
+			ret = len;
+		if (ret != len) {
 			pr_perror("Failed writing %s to %s", cg_prop_entry_p->value, path);
 			if (!skip_fails)
 				goto out;
 		}
 	}
 
-	ret = 0;
-
+	exit_code = 0;
 out:
 	if (close(fd) != 0)
 		pr_perror("Failed closing %s", path);
 
-	return ret;
+	return exit_code;
 }
 
 static CgroupPropEntry *freezer_state_entry;
@@ -1630,7 +1763,7 @@ static int restore_special_props(char *paux, size_t off, CgroupDirEntry *e)
 
 static int prepare_dir_perms(int cg, char *path, CgroupPerms *perms)
 {
-	int fd, ret;
+	int fd, ret = 0;
 
 	fd = openat(cg, path, O_DIRECTORY);
 	if (fd < 0) {
@@ -1638,7 +1771,8 @@ static int prepare_dir_perms(int cg, char *path, CgroupPerms *perms)
 		return -1;
 	}
 
-	ret = restore_perms(fd, path, perms);
+	if (perms)
+		ret = cr_fchperm(fd, perms->uid, perms->gid, perms->mode);
 	close(fd);
 	return ret;
 }
@@ -1677,12 +1811,9 @@ static int prepare_cgroup_dirs(char **controllers, int n_controllers, char *paux
 				return -1;
 
 			for (j = 0; j < n_controllers; j++) {
-				if (!strcmp(controllers[j], "cpuset") || !strcmp(controllers[j], "memory") ||
-				    !strcmp(controllers[j], "devices")) {
-					if (restore_special_props(paux, off2, e) < 0) {
-						pr_err("Restoring special cpuset props failed!\n");
-						return -1;
-					}
+				if (restore_special_props(paux, off2, e) < 0) {
+					pr_err("Restoring special cpuset props failed!\n");
+					return -1;
 				}
 			}
 		} else {
@@ -1796,7 +1927,7 @@ static int prepare_cgroup_sfd(CgroupEntry *ce)
 			if (ctrl->cnames[0][0] == 0)
 				fstype = "cgroup2";
 
-			pr_debug("\tMaking controller dir %s (%s)\n", paux, opt);
+			pr_debug("\tMaking controller dir %s (%s), type %s\n", paux, opt, fstype);
 			if (mkdir(paux, 0700)) {
 				pr_perror("\tCan't make controller dir %s", paux);
 				return -1;
@@ -1820,6 +1951,161 @@ static int prepare_cgroup_sfd(CgroupEntry *ce)
 	return 0;
 }
 
+static int cgroupd_unblock_sigterm(void)
+{
+	sigset_t unblockmask;
+
+	sigemptyset(&unblockmask);
+	sigaddset(&unblockmask, SIGTERM);
+
+	if (sigprocmask(SIG_UNBLOCK, &unblockmask, NULL)) {
+		pr_perror("cgroupd: can't unblock SIGTERM");
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * If a thread is a different cgroup set than the main thread in process,
+ * it means it is in a threaded controller. This daemon receives the cg_set
+ * number from the restored thread and move this thread to the correct
+ * cgroup controllers
+ */
+static int cgroupd(int sk)
+{
+	/*
+	 * This pairs with SIGTERM in stop_cgroupd(), and ensures that cgroupd
+	 * will receive termination signal, regardless of which signal block
+	 * mask was inherited.
+	 */
+	if (cgroupd_unblock_sigterm())
+		return -1;
+
+	pr_info("cgroud: Daemon started\n");
+
+	while (1) {
+		struct unsc_msg um;
+		uns_call_t call;
+		pid_t tid;
+		int fd, cg_set, i;
+		CgSetEntry *cg_set_entry;
+		int ret;
+
+		unsc_msg_init(&um, &call, &cg_set, NULL, 0, 0, NULL);
+		ret = recvmsg(sk, &um.h, 0);
+		if (ret <= 0) {
+			pr_perror("cgroupd: recv req error");
+			return -1;
+		}
+
+		unsc_msg_pid_fd(&um, &tid, &fd);
+		pr_debug("cgroupd: move process %d into cg_set %d\n", tid, cg_set);
+
+		cg_set_entry = find_rst_set_by_id(cg_set);
+		if (!cg_set_entry) {
+			pr_err("cgroupd: No set found %d\n", cg_set);
+			return -1;
+		}
+
+		for (i = 0; i < cg_set_entry->n_ctls; i++) {
+			int j, aux_off;
+			CgMemberEntry *ce = cg_set_entry->ctls[i];
+			char aux[PATH_MAX];
+			CgControllerEntry *ctrl = NULL;
+			const char *format;
+
+			for (j = 0; j < n_controllers; j++) {
+				CgControllerEntry *cur = controllers[j];
+				if (cgroup_contains(cur->cnames, cur->n_cnames, ce->name, NULL)) {
+					ctrl = cur;
+					break;
+				}
+			}
+
+			if (!ctrl) {
+				pr_err("cgroupd: No cg_controller_entry found for %s/%s\n", ce->name, ce->path);
+				return -1;
+			}
+
+			/*
+			 * This is not a threaded controller, all threads in this
+			 * process must be in this controller. Main thread has been
+			 * restored, so this thread is in this controller already.
+			 */
+			if (!ctrl->has_is_threaded || !ctrl->is_threaded)
+				continue;
+
+			aux_off = ctrl_dir_and_opt(ctrl, aux, sizeof(aux), NULL, 0);
+			format = ctrl->cnames[0][0] ? "/%s/tasks" : "/%s/cgroup.threads";
+			snprintf(aux + aux_off, sizeof(aux) - aux_off, format, ce->path);
+
+			/*
+			 * Cgroupd runs outside of the namespaces so we don't
+			 * need to use userns_call here
+			 */
+			if (userns_move(aux, 0, tid)) {
+				pr_err("cgroupd: Can't move thread %d into %s/%s\n", tid, ce->name, ce->path);
+				return -1;
+			}
+		}
+
+		/*
+		 * We only want to send the cred which contains thread id back.
+		 * The restored thread recvmsg(MSG_PEEK) until it gets its own
+		 * thread id.
+		 */
+		unsc_msg_init(&um, &call, &cg_set, NULL, 0, 0, &tid);
+		if (sendmsg(sk, &um.h, 0) <= 0) {
+			pr_perror("cgroupd: send req error");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int stop_cgroupd(void)
+{
+	if (cgroupd_pid) {
+		sigset_t blockmask, oldmask;
+
+		/*
+		 * Block the SIGCHLD signal to avoid triggering
+		 * sigchld_handler()
+		 */
+		sigemptyset(&blockmask);
+		sigaddset(&blockmask, SIGCHLD);
+		sigprocmask(SIG_BLOCK, &blockmask, &oldmask);
+
+		kill(cgroupd_pid, SIGTERM);
+		waitpid(cgroupd_pid, NULL, 0);
+
+		sigprocmask(SIG_SETMASK, &oldmask, NULL);
+	}
+
+	return 0;
+}
+
+static int prepare_cgroup_thread_sfd(void)
+{
+	int sk;
+
+	sk = start_unix_cred_daemon(&cgroupd_pid, cgroupd);
+	if (sk < 0) {
+		pr_err("failed to start cgroupd\n");
+		return -1;
+	}
+
+	if (install_service_fd(CGROUPD_SK, sk) < 0) {
+		kill(cgroupd_pid, SIGKILL);
+		waitpid(cgroupd_pid, NULL, 0);
+		return -1;
+	}
+
+	return 0;
+}
+
 static int rewrite_cgsets(CgroupEntry *cge, char **controllers, int n_controllers, char **dir_name, char *newroot)
 {
 	size_t dirlen = strlen(*dir_name);
@@ -1974,15 +2260,19 @@ int prepare_cgroup(void)
 	n_controllers = ce->n_controllers;
 	controllers = ce->controllers;
 
-	if (n_sets)
+	if (n_sets) {
 		/*
 		 * We rely on the fact that all sets contain the same
 		 * set of controllers. This is checked during dump
 		 * with cg_set_compare(CGCMP_ISSUB) call.
 		 */
 		ret = prepare_cgroup_sfd(ce);
-	else
+		if (ret < 0)
+			return ret;
+		ret = prepare_cgroup_thread_sfd();
+	} else {
 		ret = 0;
+	}
 
 	return ret;
 }
diff --git a/criu/config.c b/criu/config.c
index 14a11f9c3..d7ef3f8e8 100644
--- a/criu/config.c
+++ b/criu/config.c
@@ -18,6 +18,7 @@
 #include "cr_options.h"
 #include "filesystems.h"
 #include "file-lock.h"
+#include "image.h"
 #include "irmap.h"
 #include "mount.h"
 #include "mount-v2.h"
@@ -430,6 +431,7 @@ void init_opts(void)
 	opts.pre_dump_mode = PRE_DUMP_SPLICE;
 	opts.file_validation_method = FILE_VALIDATION_DEFAULT;
 	opts.network_lock_method = NETWORK_LOCK_DEFAULT;
+	opts.ghost_fiemap = FIEMAP_DEFAULT;
 }
 
 bool deprecated_ok(char *what)
@@ -696,14 +698,21 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
 		{ "cgroup-yard", required_argument, 0, 1096 },
 		{ "pre-dump-mode", required_argument, 0, 1097 },
 		{ "file-validation", required_argument, 0, 1098 },
+		BOOL_OPT("skip-file-rwx-check", &opts.skip_file_rwx_check),
 		{ "lsm-mount-context", required_argument, 0, 1099 },
 		{ "network-lock", required_argument, 0, 1100 },
 		BOOL_OPT("mntns-compat-mode", &opts.mntns_compat_mode),
+		BOOL_OPT("unprivileged", &opts.unprivileged),
+		BOOL_OPT("ghost-fiemap", &opts.ghost_fiemap),
+		BOOL_OPT(OPT_ALLOW_UPROBES, &opts.allow_uprobes),
 		{},
 	};
 
 #undef BOOL_OPT
 
+	if (argv && argv[0])
+		SET_CHAR_OPTS(argv_0, argv[0]);
+
 	ret = pre_parse(argc, argv, usage_error, &no_default_config, &cfg_file);
 
 	if (ret)
@@ -1029,6 +1038,8 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
 				opts.network_lock_method = NETWORK_LOCK_IPTABLES;
 			} else if (!strcmp("nftables", optarg)) {
 				opts.network_lock_method = NETWORK_LOCK_NFTABLES;
+			} else if (!strcmp("skip", optarg) || !strcmp("none", optarg)) {
+				opts.network_lock_method = NETWORK_LOCK_SKIP;
 			} else {
 				pr_err("Invalid value for --network-lock: %s\n", optarg);
 				return 1;
@@ -1115,6 +1126,11 @@ int check_options(void)
 		}
 	}
 
+	if (opts.track_mem && !kdat.has_dirty_track) {
+		pr_err("Tracking memory is not available. Consider omitting --track-mem option.\n");
+		return 1;
+	}
+
 	if (check_namespace_opts()) {
 		pr_err("Error: namespace flags conflict\n");
 		return 1;
diff --git a/criu/cr-check.c b/criu/cr-check.c
index f589a91da..7c3dc76dd 100644
--- a/criu/cr-check.c
+++ b/criu/cr-check.c
@@ -21,7 +21,8 @@
 #include <sys/prctl.h>
 #include <sched.h>
 #include <sys/mount.h>
-#include <linux/aio_abi.h>
+#include <sys/utsname.h>
+#include <sys/stat.h>
 
 #include "../soccr/soccr.h"
 
@@ -30,7 +31,7 @@
 #include "sockets.h"
 #include "crtools.h"
 #include "log.h"
-#include "util-pie.h"
+#include "util-caps.h"
 #include "prctl.h"
 #include "files.h"
 #include "sk-inet.h"
@@ -52,6 +53,8 @@
 #include "net.h"
 #include "restorer.h"
 #include "uffd.h"
+#include "linux/aio_abi.h"
+#include "mount-v2.h"
 
 #include "images/inventory.pb-c.h"
 
@@ -104,7 +107,7 @@ out:
 
 static int check_apparmor_stacking(void)
 {
-	if (!check_aa_ns_dumping())
+	if (!kdat.apparmor_ns_dumping_enabled)
 		return -1;
 
 	return 0;
@@ -515,6 +518,14 @@ static int check_ipc(void)
 {
 	int ret;
 
+	/*
+	 * Since kernel 5.16 sem_next_id can be accessed via CAP_CHECKPOINT_RESTORE, however
+	 * for non-root users access() runs with an empty set of caps and will therefore always
+	 * fail.
+	 */
+	if (opts.uid)
+		return 0;
+
 	ret = access("/proc/sys/kernel/sem_next_id", R_OK | W_OK);
 	if (!ret)
 		return 0;
@@ -1039,10 +1050,14 @@ static int check_tcp(void)
 	}
 
 	val = 1;
-	ret = setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val));
-	if (ret < 0) {
-		pr_perror("Can't turn TCP repair mode ON");
-		goto out;
+	if (!opts.unprivileged || has_cap_net_admin(opts.cap_eff)) {
+		ret = setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val));
+		if (ret < 0) {
+			pr_perror("Can't turn TCP repair mode ON");
+			goto out;
+		}
+	} else {
+		pr_info("Not checking for TCP repair mode. Please set CAP_NET_ADMIN\n");
 	}
 
 	optlen = sizeof(val);
@@ -1073,6 +1088,8 @@ static int kerndat_tcp_repair_window(void)
 	int sk, val = 1;
 
 	sk = socket(AF_INET, SOCK_STREAM, 0);
+	if (sk < 0 && errno == EAFNOSUPPORT)
+		sk = socket(AF_INET6, SOCK_STREAM, 0);
 	if (sk < 0) {
 		pr_perror("Unable to create inet socket");
 		goto errn;
@@ -1180,7 +1197,7 @@ static int check_ipt_legacy(void)
 	char *ipt_legacy_bin;
 	char *ip6t_legacy_bin;
 
-	ipt_legacy_bin = get_legacy_iptables_bin(false);
+	ipt_legacy_bin = get_legacy_iptables_bin(false, false);
 	if (!ipt_legacy_bin) {
 		pr_warn("Couldn't find iptables version which is using iptables legacy API\n");
 		return -1;
@@ -1191,7 +1208,7 @@ static int check_ipt_legacy(void)
 	if (!kdat.ipv6)
 		return 0;
 
-	ip6t_legacy_bin = get_legacy_iptables_bin(true);
+	ip6t_legacy_bin = get_legacy_iptables_bin(true, false);
 	if (!ip6t_legacy_bin) {
 		pr_warn("Couldn't find ip6tables version which is using iptables legacy API\n");
 		return -1;
@@ -1311,9 +1328,6 @@ static int check_pidfd_store(void)
 
 static int check_ns_pid(void)
 {
-	if (kerndat_has_nspid() < 0)
-		return -1;
-
 	if (!kdat.has_nspid)
 		return -1;
 
@@ -1362,6 +1376,236 @@ static int check_openat2(void)
 	return 0;
 }
 
+static int check_ipv6_freebind(void)
+{
+	if (!kdat.has_ipv6_freebind)
+		return -1;
+
+	return 0;
+}
+
+static int check_pagemap_scan(void)
+{
+	if (!kdat.has_pagemap_scan)
+		return -1;
+
+	return 0;
+}
+
+static int check_timer_cr_ids(void)
+{
+	if (!kdat.has_timer_cr_ids)
+		return -1;
+
+	return 0;
+}
+
+/* musl doesn't have a statx wrapper... */
+struct staty {
+	__u32 stx_dev_major;
+	__u32 stx_dev_minor;
+	__u64 stx_ino;
+};
+
+static long get_file_dev_and_inode(void *addr, struct staty *stx)
+{
+	char buf[4096];
+	FILE *mapf;
+
+	mapf = fopen("/proc/self/maps", "r");
+	if (mapf == NULL) {
+		pr_perror("fopen(/proc/self/maps)");
+		return -1;
+	}
+
+	while (fgets(buf, sizeof(buf), mapf)) {
+		unsigned long start, end;
+		uint32_t maj, min;
+		__u64 ino;
+
+		if (sscanf(buf, "%lx-%lx %*s %*s %x:%x %llu",
+			   &start, &end, &maj, &min, &ino) != 5) {
+			pr_perror("Unable to parse: %s", buf);
+			return -1;
+		}
+		if (start == (unsigned long)addr) {
+			stx->stx_dev_major = maj;
+			stx->stx_dev_minor = min;
+			stx->stx_ino = ino;
+			return 0;
+		}
+	}
+
+	pr_err("Unable to find the mapping\n");
+	return -1;
+}
+
+static int ovl_mount(void)
+{
+	int tmpfs, fsfd, ovl;
+
+	fsfd = cr_fsopen("tmpfs", 0);
+	if (fsfd == -1) {
+		pr_perror("Unable to fsopen tmpfs");
+		return -1;
+	}
+
+	if (cr_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1) {
+		pr_perror("Unable to create tmpfs mount");
+		return -1;
+	}
+
+	tmpfs = cr_fsmount(fsfd, 0, 0);
+	if (tmpfs == -1) {
+		pr_perror("Unable to mount tmpfs");
+		return -1;
+	}
+
+	close(fsfd);
+
+	/* overlayfs can't be constructed on top of a detached mount. */
+	if (sys_move_mount(tmpfs, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH)) {
+		pr_perror("Unable to attach tmpfs mount");
+		return -1;
+	}
+	close(tmpfs);
+
+	if (chdir("/tmp")) {
+		pr_perror("Unable to change working directory");
+		return -1;
+	}
+
+	if (mkdir("/tmp/w", 0755) == -1 ||
+	    mkdir("/tmp/u", 0755) == -1 ||
+	    mkdir("/tmp/l", 0755) == -1) {
+		pr_perror("mkdir");
+		return -1;
+	}
+
+	fsfd = cr_fsopen("overlay", 0);
+	if (fsfd == -1) {
+		pr_perror("Unable to fsopen overlayfs");
+		return -1;
+	}
+	if (cr_fsconfig(fsfd, FSCONFIG_SET_STRING, "source", "test", 0) == -1 ||
+	    cr_fsconfig(fsfd, FSCONFIG_SET_STRING, "lowerdir", "/tmp/l", 0) == -1 ||
+	    cr_fsconfig(fsfd, FSCONFIG_SET_STRING, "upperdir", "/tmp/u", 0) == -1 ||
+	    cr_fsconfig(fsfd, FSCONFIG_SET_STRING, "workdir", "/tmp/w", 0) == -1) {
+		pr_perror("Unable to configure overlayfs");
+		return -1;
+	}
+	if (cr_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1) {
+		pr_perror("Unable to create overlayfs");
+		return -1;
+	}
+	ovl = cr_fsmount(fsfd, 0, 0);
+	if (ovl == -1) {
+		pr_perror("Unable to mount overlayfs");
+		return -1;
+	}
+
+	return ovl;
+}
+
+/*
+ * Check that the file device and inode shown in /proc/pid/maps match values
+ * returned by stat(2).
+ */
+static int do_check_overlayfs_maps(void)
+{
+	struct staty stx, mstx;
+	struct stat st;
+	int ovl, fd;
+	void *addr;
+
+	/* Create a new mount namespace to not care about cleaning test mounts. */
+	if (unshare(CLONE_NEWNS) == -1) {
+		pr_warn("Unable to create a new mount namespace\n");
+		return 0;
+	}
+
+	if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) == -1) {
+		pr_perror("Unable to remount / with MS_SLAVE");
+		return -1;
+	}
+
+	ovl = ovl_mount();
+	if (ovl == -1)
+		return -1;
+
+	fd = openat(ovl, "test", O_RDWR | O_CREAT, 0644);
+	if (fd == -1) {
+		pr_perror("Unable to open a test file");
+		return -1;
+	}
+
+	addr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0);
+	if (addr == MAP_FAILED) {
+		pr_perror("Unable to map the test file");
+		return -1;
+	}
+
+	if (get_file_dev_and_inode(addr, &mstx))
+		return -1;
+	if (fstat(fd, &st)) {
+		pr_perror("stat");
+		return -1;
+	}
+	stx.stx_dev_major = major(st.st_dev);
+	stx.stx_dev_minor = minor(st.st_dev);
+	stx.stx_ino = st.st_ino;
+
+	if (stx.stx_dev_major != mstx.stx_dev_major ||
+	    stx.stx_dev_minor != mstx.stx_dev_minor ||
+	    stx.stx_ino != mstx.stx_ino) {
+		pr_err("unmatched dev:ino %x:%x:%llx (expected %x:%x:%llx)\n",
+		       mstx.stx_dev_major, mstx.stx_dev_minor, mstx.stx_ino,
+		       stx.stx_dev_major, stx.stx_dev_minor, stx.stx_ino);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int check_overlayfs_maps(void)
+{
+	pid_t pid;
+	int status;
+
+	pid = fork();
+	if (pid == -1) {
+		pr_perror("Unable to fork a child");
+		return -1;
+	}
+	if (pid == 0) {
+		if (do_check_overlayfs_maps())
+			exit(1);
+		exit(0);
+	}
+	if (waitpid(pid, &status, 0) == -1) {
+		pr_perror("waitpid");
+		return -1;
+	}
+	return status == 0 ? 0 : -1;
+}
+
+static int check_breakpoints(void)
+{
+	if (!kdat.has_breakpoints) {
+		pr_warn("Hardware breakpoints don't seem to work\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int check_pagemap_scan_guard_pages(void)
+{
+	kerndat_warn_about_madv_guards();
+
+	return kdat.has_pagemap_scan_guard_pages ? 0 : -1;
+}
+
 static int (*chk_feature)(void);
 
 /*
@@ -1389,14 +1633,12 @@ static int (*chk_feature)(void);
 			return ret;                 \
 		}                                   \
 	} while (0)
+
 int cr_check(void)
 {
 	struct ns_id *ns;
 	int ret = 0;
 
-	if (!is_root_user())
-		return -1;
-
 	root_item = alloc_pstree_item();
 	if (root_item == NULL)
 		return -1;
@@ -1478,13 +1720,20 @@ int cr_check(void)
 		ret |= check_newifindex();
 		ret |= check_pidfd_store();
 		ret |= check_ns_pid();
-		ret |= check_apparmor_stacking();
 		ret |= check_network_lock_nftables();
 		ret |= check_sockopt_buf_lock();
 		ret |= check_memfd_hugetlb();
 		ret |= check_move_mount_set_group();
 		ret |= check_openat2();
 		ret |= check_ptrace_get_rseq_conf();
+		ret |= check_ipv6_freebind();
+		ret |= check_pagemap_scan();
+		ret |= check_overlayfs_maps();
+		ret |= check_timer_cr_ids();
+		ret |= check_pagemap_scan_guard_pages();
+
+		if (kdat.lsm == LSMTYPE__APPARMOR)
+			ret |= check_apparmor_stacking();
 	}
 
 	/*
@@ -1494,6 +1743,10 @@ int cr_check(void)
 		ret |= check_autofs();
 		ret |= check_compat_cr();
 	}
+	/*
+	 * Category 4 - optional.
+	 */
+	check_breakpoints();
 
 	pr_msg("%s\n", ret ? CHECK_MAYBE : CHECK_GOOD);
 	return ret;
@@ -1602,6 +1855,12 @@ static struct feature_list feature_list[] = {
 	{ "move_mount_set_group", check_move_mount_set_group },
 	{ "openat2", check_openat2 },
 	{ "get_rseq_conf", check_ptrace_get_rseq_conf },
+	{ "ipv6_freebind", check_ipv6_freebind },
+	{ "pagemap_scan", check_pagemap_scan },
+	{ "timer_cr_ids", check_timer_cr_ids },
+	{ "overlayfs_maps", check_overlayfs_maps },
+	{ "breakpoints", check_breakpoints },
+	{ "pagemap_scan_guard_pages", check_pagemap_scan_guard_pages },
 	{ NULL, NULL },
 };
 
@@ -1653,3 +1912,54 @@ static char *feature_name(int (*func)(void))
 	}
 	return NULL;
 }
+
+static int pr_set_dumpable(int value)
+{
+	int ret = prctl(PR_SET_DUMPABLE, value, 0, 0, 0);
+	if (ret < 0)
+		pr_perror("Unable to set PR_SET_DUMPABLE");
+	return ret;
+}
+
+int check_caps(void)
+{
+	/* Read out effective capabilities and store in opts.cap_eff. */
+	if (set_opts_cap_eff())
+		goto out;
+
+	/*
+	 * No matter if running as root or not. CRIU always needs
+	 * at least these capabilities.
+	 */
+	if (!has_cap_checkpoint_restore(opts.cap_eff))
+		goto out;
+
+	/* For some things we need to know if we are running as root. */
+	opts.uid = geteuid();
+
+	if (!opts.uid) {
+		/* CRIU is running as root. No further checks are necessary. */
+		return 0;
+	}
+
+	if (!opts.unprivileged) {
+		pr_msg("Running as non-root requires '--unprivileged'\n");
+		pr_msg("Please consult the documentation for limitations when running as non-root\n");
+		return -1;
+	}
+
+	/*
+	 * At his point we know we are running as non-root with the necessary
+	 * capabilities available. Now we have to make the process dumpable
+	 * so that /proc/self is not owned by root.
+	 */
+	if (pr_set_dumpable(1))
+		return -1;
+
+	return 0;
+out:
+	pr_msg("CRIU needs to have the CAP_SYS_ADMIN or the CAP_CHECKPOINT_RESTORE capability: \n");
+	pr_msg("setcap cap_checkpoint_restore+eip %s\n", opts.argv_0);
+
+	return -1;
+}
diff --git a/criu/cr-dedup.c b/criu/cr-dedup.c
index c0c21f53e..feeb9ebb0 100644
--- a/criu/cr-dedup.c
+++ b/criu/cr-dedup.c
@@ -87,7 +87,8 @@ static int cr_dedup_one_pagemap(unsigned long img_id, int flags)
 		if (ret <= 0)
 			goto exit;
 
-		pr_debug("dedup iovec base=%" PRIx64 ", len=%lu\n", pr.pe->vaddr, pagemap_len(pr.pe));
+		pr_debug("dedup iovec %" PRIx64 " - %" PRIx64 "\n",
+			 pr.pe->vaddr, pr.pe->vaddr + pagemap_len(pr.pe));
 		if (!pagemap_in_parent(pr.pe)) {
 			ret = dedup_one_iovec(prp, pr.pe->vaddr, pagemap_len(pr.pe));
 			if (ret)
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index f58701e5c..a58aaf34a 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -86,6 +86,8 @@
 #include "pidfd-store.h"
 #include "apparmor.h"
 #include "asm/dump.h"
+#include "timer.h"
+#include "sigact.h"
 
 /*
  * Architectures can overwrite this function to restore register sets that
@@ -128,6 +130,23 @@ int collect_mappings(pid_t pid, struct vm_area_list *vma_area_list, dump_filemap
 	if (ret < 0)
 		goto err;
 
+	/*
+	 * In addition to real process VMAs we should keep an info about
+	 * madvise(MADV_GUARD_INSTALL) pages. While these are not represented
+	 * as a struct vm_area_struct in the kernel, it is convenient to treat
+	 * them as mappings in CRIU and reuse the same VMA images but with only
+	 * VMA_AREA_GUARD flag set.
+	 *
+	 * Also, we don't need to dump them during pre-dump.
+	 */
+	if (dump_file) {
+		ret = collect_madv_guards(pid, vma_area_list);
+		if (ret < 0) {
+			pr_err("Collect MADV_GUARD_INSTALL pages (pid: %d) failed with %d\n", pid, ret);
+			goto err;
+		}
+	}
+
 	pr_info("Collected, longest area occupies %lu pages\n", vma_area_list->nr_priv_pages_longest);
 	pr_info_vma_list(&vma_area_list->h);
 
@@ -157,6 +176,11 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc)
 	tc->has_sched_policy = true;
 	tc->sched_policy = ret;
 
+	/* The reset-on-fork flag might be used in combination
+	 * with SCHED_FIFO or SCHED_RR to reset the scheduling
+	 * policy/priority in child processes.
+	 */
+	ret &= ~SCHED_RESET_ON_FORK;
 	if ((ret == SCHED_RR) || (ret == SCHED_FIFO)) {
 		ret = syscall(__NR_sched_getparam, pid, &sp);
 		if (ret < 0) {
@@ -429,7 +453,7 @@ static int dump_filemap(struct vma_area *vma_area, int fd)
 	if (vma_area->aufs_rpath) {
 		struct fd_link aufs_link;
 
-		strlcpy(aufs_link.name, vma_area->aufs_rpath, sizeof(aufs_link.name));
+		__strlcpy(aufs_link.name, vma_area->aufs_rpath, sizeof(aufs_link.name));
 		aufs_link.len = strlen(aufs_link.name);
 		p.link = &aufs_link;
 	}
@@ -759,6 +783,7 @@ static int dump_task_core_all(struct parasite_ctl *ctl, struct pstree_item *item
 	pid_t pid = item->pid->real;
 	int ret = -1;
 	struct parasite_dump_cgroup_args cgroup_args, *info = NULL;
+	u32 *cg_set;
 
 	BUILD_BUG_ON(sizeof(cgroup_args) < PARASITE_ARG_SIZE_MIN);
 
@@ -769,11 +794,16 @@ static int dump_task_core_all(struct parasite_ctl *ctl, struct pstree_item *item
 	core->tc->child_subreaper = misc->child_subreaper;
 	core->tc->has_child_subreaper = true;
 
+	if (misc->membarrier_registration_mask) {
+		core->tc->membarrier_registration_mask = misc->membarrier_registration_mask;
+		core->tc->has_membarrier_registration_mask = true;
+	}
+
 	ret = get_task_personality(pid, &core->tc->personality);
 	if (ret < 0)
 		goto err;
 
-	strlcpy((char *)core->tc->comm, stat->comm, TASK_COMM_LEN);
+	__strlcpy((char *)core->tc->comm, stat->comm, TASK_COMM_LEN);
 	core->tc->flags = stat->flags;
 	core->tc->task_state = item->pid->state;
 	core->tc->exit_code = 0;
@@ -781,6 +811,11 @@ static int dump_task_core_all(struct parasite_ctl *ctl, struct pstree_item *item
 	core->thread_core->creds->lsm_profile = dmpi(item)->thread_lsms[0]->profile;
 	core->thread_core->creds->lsm_sockcreate = dmpi(item)->thread_lsms[0]->sockcreate;
 
+	if (core->tc->task_state == TASK_STOPPED) {
+		core->tc->has_stop_signo = true;
+		core->tc->stop_signo = item->pid->stop_signo;
+	}
+
 	ret = parasite_dump_thread_leader_seized(ctl, pid, core);
 	if (ret)
 		goto err;
@@ -799,13 +834,15 @@ static int dump_task_core_all(struct parasite_ctl *ctl, struct pstree_item *item
 	 */
 	if (item->ids->has_cgroup_ns_id && !item->parent) {
 		info = &cgroup_args;
+		strcpy(cgroup_args.thread_cgrp, "self/cgroup");
 		ret = parasite_dump_cgroup(ctl, &cgroup_args);
 		if (ret)
 			goto err;
 	}
 
-	core->tc->has_cg_set = true;
-	ret = dump_task_cgroup(item, &core->tc->cg_set, info);
+	core->thread_core->has_cg_set = true;
+	cg_set = &core->thread_core->cg_set;
+	ret = dump_thread_cgroup(item, cg_set, info, -1);
 	if (ret)
 		goto err;
 
@@ -867,6 +904,72 @@ static int collect_file_locks(void)
 	return parse_file_locks();
 }
 
+static bool task_in_rseq(struct criu_rseq_cs *rseq_cs, uint64_t addr)
+{
+	return addr >= rseq_cs->start_ip && addr < rseq_cs->start_ip + rseq_cs->post_commit_offset;
+}
+
+static int fixup_thread_rseq(const struct pstree_item *item, int i)
+{
+	CoreEntry *core = item->core[i];
+	struct criu_rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
+	pid_t tid = item->threads[i].real;
+
+	if (!kdat.has_ptrace_get_rseq_conf)
+		return 0;
+
+	/* equivalent to (struct rseq)->rseq_cs is NULL */
+	if (!rseq_cs->start_ip)
+		return 0;
+
+	pr_debug(
+		"fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n",
+		tid, rseq_cs->start_ip, rseq_cs->abort_ip, rseq_cs->post_commit_offset, rseq_cs->flags,
+		rseq_cs->version, (unsigned long)TI_IP(core));
+
+	if (rseq_cs->version != 0) {
+		pr_err("unsupported RSEQ ABI version = %d\n", rseq_cs->version);
+		return -1;
+	}
+
+	if (task_in_rseq(rseq_cs, TI_IP(core))) {
+		struct pid *tid = &item->threads[i];
+
+		/*
+		 * We need to fixup task instruction pointer from
+		 * the original one (which lays inside rseq critical section)
+		 * to rseq abort handler address. But we need to look on rseq_cs->flags
+		 * (please refer to struct rseq -> flags field description).
+		 * Naive idea of flags support may be like... let's change instruction pointer (IP)
+		 * to rseq_cs->abort_ip if !(rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL).
+		 * But unfortunately, it doesn't work properly, because the kernel does
+		 * clean up of rseq_cs field in the struct rseq (modifies userspace memory).
+		 * So, we need to preserve original value of (struct rseq)->rseq_cs field in the
+		 * image and restore it's value before releasing threads (see restore_rseq_cs()).
+		 *
+		 * It's worth to mention that we need to fixup IP in CoreEntry
+		 * (used when full dump/restore is performed) and also in
+		 * the parasite regs storage (used if --leave-running option is used,
+		 * or if dump error occurred and process execution is resumed).
+		 */
+
+		if (!(rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL)) {
+			pr_warn("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n",
+				tid->real);
+
+			TI_IP(core) = rseq_cs->abort_ip;
+
+			if (item->pid->real == tid->real) {
+				compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip);
+			} else {
+				compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip);
+			}
+		}
+	}
+
+	return 0;
+}
+
 static int dump_task_thread(struct parasite_ctl *parasite_ctl, const struct pstree_item *item, int id)
 {
 	struct parasite_thread_ctl *tctl = dmpi(item)->thread_ctls[id];
@@ -890,6 +993,12 @@ static int dump_task_thread(struct parasite_ctl *parasite_ctl, const struct pstr
 	core->thread_core->creds->lsm_profile = dmpi(item)->thread_lsms[id]->profile;
 	core->thread_core->creds->lsm_sockcreate = dmpi(item)->thread_lsms[0]->sockcreate;
 
+	ret = fixup_thread_rseq(item, id);
+	if (ret) {
+		pr_err("Can't fixup rseq for pid %d\n", pid);
+		goto err;
+	}
+
 	img = open_image(CR_FD_CORE, O_DUMP, tid->ns[0].virt);
 	if (!img)
 		goto err;
@@ -898,6 +1007,7 @@ static int dump_task_thread(struct parasite_ctl *parasite_ctl, const struct pstr
 
 	close_image(img);
 err:
+	compel_release_thread(tctl);
 	pr_info("----------------------------------------\n");
 	return ret;
 }
@@ -912,7 +1022,7 @@ static int dump_one_zombie(const struct pstree_item *item, const struct proc_pid
 	if (!core)
 		return -1;
 
-	strlcpy((char *)core->tc->comm, pps->comm, TASK_COMM_LEN);
+	__strlcpy((char *)core->tc->comm, pps->comm, TASK_COMM_LEN);
 	core->tc->task_state = TASK_DEAD;
 	core->tc->exit_code = pps->exit_code;
 
@@ -1034,7 +1144,7 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item)
 	return 0;
 }
 
-static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseqc, struct rseq_cs *rseq_cs,
+static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseqc, struct criu_rseq_cs *rseq_cs,
 			struct criu_rseq *rseq)
 {
 	int ret;
@@ -1065,10 +1175,11 @@ static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseqc, st
 	if (!rseq->rseq_cs)
 		return 0;
 
-	ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(rseq->rseq_cs), sizeof(struct rseq_cs));
+	ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(rseq->rseq_cs), sizeof(struct criu_rseq_cs));
 	if (ret) {
 		pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs struct\n", tid,
-		       (unsigned long)rseq_cs, (unsigned long)rseq->rseq_cs, (unsigned long)sizeof(struct rseq_cs));
+		       (unsigned long)rseq_cs, (unsigned long)rseq->rseq_cs,
+		       (unsigned long)sizeof(struct criu_rseq_cs));
 		return -1;
 	}
 
@@ -1083,7 +1194,7 @@ static int dump_thread_rseq(struct pstree_item *item, int i)
 	CoreEntry *core = item->core[i];
 	RseqEntry **rseqep = &core->thread_core->rseq_entry;
 	struct criu_rseq rseq = {};
-	struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
+	struct criu_rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
 	pid_t tid = item->threads[i].real;
 
 	/*
@@ -1149,7 +1260,7 @@ err:
 static int dump_task_rseq(pid_t pid, struct pstree_item *item)
 {
 	int i;
-	struct rseq_cs *thread_rseq_cs;
+	struct criu_rseq_cs *thread_rseq_cs;
 
 	/* if rseq() syscall isn't supported then nothing to dump */
 	if (!kdat.has_rseq)
@@ -1174,95 +1285,11 @@ free_rseq:
 	return -1;
 }
 
-static bool task_in_rseq(struct rseq_cs *rseq_cs, uint64_t addr)
-{
-	return addr >= rseq_cs->start_ip && addr < rseq_cs->start_ip + rseq_cs->post_commit_offset;
-}
-
-static int fixup_thread_rseq(struct pstree_item *item, int i)
-{
-	CoreEntry *core = item->core[i];
-	struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
-	pid_t tid = item->threads[i].real;
-
-	/* equivalent to (struct rseq)->rseq_cs is NULL */
-	if (!rseq_cs->start_ip)
-		return 0;
-
-	pr_debug(
-		"fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n",
-		tid, rseq_cs->start_ip, rseq_cs->abort_ip, rseq_cs->post_commit_offset, rseq_cs->flags,
-		rseq_cs->version, (unsigned long)TI_IP(core));
-
-	if (rseq_cs->version != 0) {
-		pr_err("unsupported RSEQ ABI version = %d\n", rseq_cs->version);
-		return -1;
-	}
-
-	if (task_in_rseq(rseq_cs, TI_IP(core))) {
-		struct pid *tid = &item->threads[i];
-
-		/*
-		 * We need to fixup task instruction pointer from
-		 * the original one (which lays inside rseq critical section)
-		 * to rseq abort handler address. But we need to look on rseq_cs->flags
-		 * (please refer to struct rseq -> flags field description).
-		 * Naive idea of flags support may be like... let's change instruction pointer (IP)
-		 * to rseq_cs->abort_ip if !(rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL).
-		 * But unfortunately, it doesn't work properly, because the kernel does
-		 * clean up of rseq_cs field in the struct rseq (modifies userspace memory).
-		 * So, we need to preserve original value of (struct rseq)->rseq_cs field in the
-		 * image and restore it's value before releasing threads (see restore_rseq_cs()).
-		 *
-		 * It's worth to mention that we need to fixup IP in CoreEntry
-		 * (used when full dump/restore is performed) and also in
-		 * the parasite regs storage (used if --leave-running option is used,
-		 * or if dump error occurred and process execution is resumed).
-		 */
-
-		if (!(rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL)) {
-			pr_warn("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n",
-				tid->real);
-
-			TI_IP(core) = rseq_cs->abort_ip;
-
-			if (item->pid->real == tid->real) {
-				compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip);
-			} else {
-				compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip);
-			}
-		}
-	}
-
-	return 0;
-}
-
-static int fixup_task_rseq(pid_t pid, struct pstree_item *item)
-{
-	int ret = 0;
-	int i;
-
-	if (!kdat.has_ptrace_get_rseq_conf)
-		return 0;
-
-	for (i = 0; i < item->nr_threads; i++) {
-		if (fixup_thread_rseq(item, i)) {
-			ret = -1;
-			goto exit;
-		}
-	}
-
-exit:
-	xfree(dmpi(item)->thread_rseq_cs);
-	dmpi(item)->thread_rseq_cs = NULL;
-	return ret;
-}
-
 static struct proc_pid_stat pps_buf;
 
 static int dump_task_threads(struct parasite_ctl *parasite_ctl, const struct pstree_item *item)
 {
-	int i;
+	int i, ret = 0;
 
 	for (i = 0; i < item->nr_threads; i++) {
 		/* Leader is already dumped */
@@ -1270,11 +1297,14 @@ static int dump_task_threads(struct parasite_ctl *parasite_ctl, const struct pst
 			item->threads[i].ns[0].virt = vpid(item);
 			continue;
 		}
-		if (dump_task_thread(parasite_ctl, item, i))
-			return -1;
+		ret = dump_task_thread(parasite_ctl, item, i);
+		if (ret)
+			break;
 	}
 
-	return 0;
+	xfree(dmpi(item)->thread_rseq_cs);
+	dmpi(item)->thread_rseq_cs = NULL;
+	return ret;
 }
 
 /*
@@ -1383,7 +1413,7 @@ static int dump_zombies(void)
 		item->sid = pps_buf.sid;
 		item->pgid = pps_buf.pgid;
 
-		BUG_ON(!list_empty(&item->children));
+		BUG_ON(has_children(item));
 
 		if (!item->sid) {
 			pr_err("A session leader of zombie process %d(%d) is outside of its pid namespace\n",
@@ -1403,6 +1433,39 @@ err:
 	return ret;
 }
 
+static int dump_task_cgroup(struct parasite_ctl *parasite_ctl, const struct pstree_item *item)
+{
+	struct parasite_dump_cgroup_args cgroup_args, *info;
+	int i;
+
+	BUILD_BUG_ON(sizeof(cgroup_args) < PARASITE_ARG_SIZE_MIN);
+	for (i = 0; i < item->nr_threads; i++) {
+		CoreEntry *core = item->core[i];
+
+		/* Leader is already dumped */
+		if (item->pid->real == item->threads[i].real)
+			continue;
+
+		/* For now, we only need to dump the root task's cgroup ns, because we
+		 * know all the tasks are in the same cgroup namespace because we don't
+		 * allow nesting.
+		 */
+		info = NULL;
+		if (item->ids->has_cgroup_ns_id && !item->parent) {
+			info = &cgroup_args;
+			sprintf(cgroup_args.thread_cgrp, "self/task/%d/cgroup", item->threads[i].ns[0].virt);
+			if (parasite_dump_cgroup(parasite_ctl, &cgroup_args))
+				return -1;
+		}
+
+		core->thread_core->has_cg_set = true;
+		if (dump_thread_cgroup(item, &core->thread_core->cg_set, info, i))
+			return -1;
+	}
+
+	return 0;
+}
+
 static int pre_dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
 {
 	pid_t pid = item->pid->real;
@@ -1415,7 +1478,7 @@ static int pre_dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie
 	vm_area_list_init(&vmas);
 
 	pr_info("========================================\n");
-	pr_info("Pre-dumping task (pid: %d)\n", pid);
+	pr_info("Pre-dumping task (pid: %d comm: %s)\n", pid, __task_comm_info(pid));
 	pr_info("========================================\n");
 
 	/*
@@ -1505,7 +1568,7 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
 	vm_area_list_init(&vmas);
 
 	pr_info("========================================\n");
-	pr_info("Dumping task (pid: %d)\n", pid);
+	pr_info("Dumping task (pid: %d comm: %s)\n", pid, __task_comm_info(pid));
 	pr_info("========================================\n");
 
 	if (item->pid->state == TASK_DEAD)
@@ -1565,7 +1628,7 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
 		goto err;
 	}
 
-	ret = fixup_task_rseq(pid, item);
+	ret = fixup_thread_rseq(item, 0);
 	if (ret) {
 		pr_err("Fixup rseq for %d failed %d\n", pid, ret);
 		goto err;
@@ -1675,6 +1738,12 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
 		goto err_cure;
 	}
 
+	ret = dump_task_cgroup(parasite_ctl, item);
+	if (ret) {
+		pr_err("Dump cgroup of threads in process (pid: %d) failed with %d\n", pid, ret);
+		goto err_cure;
+	}
+
 	ret = compel_stop_daemon(parasite_ctl);
 	if (ret) {
 		pr_err("Can't stop daemon in parasite (pid: %d)\n", pid);
@@ -1983,7 +2052,6 @@ static int cr_dump_finish(int ret)
 	if (bfd_flush_images())
 		ret = -1;
 
-	cr_plugin_fini(CR_PLUGIN_STAGE__DUMP, ret);
 	cgp_fini();
 
 	if (!ret) {
@@ -2037,6 +2105,9 @@ static int cr_dump_finish(int ret)
 
 	if (arch_set_thread_regs(root_item, true) < 0)
 		return -1;
+
+	cr_plugin_fini(CR_PLUGIN_STAGE__DUMP, ret);
+
 	pstree_switch_state(root_item, (ret || post_dump_ret) ? TASK_ALIVE : opts.final_state);
 	timing_stop(TIME_FROZEN);
 	free_pstree(root_item);
@@ -2049,7 +2120,11 @@ static int cr_dump_finish(int ret)
 	close_service_fd(CR_PROC_FD_OFF);
 	close_image_dir();
 
-	if (ret) {
+	if (ret || post_dump_ret) {
+		if (fault_injected(FI_DUMP_CRASH)) {
+			pr_info("fault: CRIU dump crashed!\n");
+			abort();
+		}
 		pr_err("Dumping FAILED.\n");
 	} else {
 		write_stats(DUMP_STATS);
@@ -2063,11 +2138,13 @@ int cr_dump_tasks(pid_t pid)
 	InventoryEntry he = INVENTORY_ENTRY__INIT;
 	InventoryEntry *parent_ie = NULL;
 	struct pstree_item *item;
-	int pre_dump_ret = 0;
-	int ret = -1;
+	int ret;
+	int exit_code = -1;
+
+	kerndat_warn_about_madv_guards();
 
 	pr_info("========================================\n");
-	pr_info("Dumping processes (pid: %d)\n", pid);
+	pr_info("Dumping processes (pid: %d comm: %s)\n", pid, __task_comm_info(pid));
 	pr_info("========================================\n");
 
 	/*
@@ -2082,9 +2159,9 @@ int cr_dump_tasks(pid_t pid)
 		goto err;
 	root_item->pid->real = pid;
 
-	pre_dump_ret = run_scripts(ACT_PRE_DUMP);
-	if (pre_dump_ret != 0) {
-		pr_err("Pre dump script failed with %d!\n", pre_dump_ret);
+	ret = run_scripts(ACT_PRE_DUMP);
+	if (ret != 0) {
+		pr_err("Pre dump script failed with %d!\n", ret);
 		goto err;
 	}
 	if (init_stats(DUMP_STATS))
@@ -2134,12 +2211,18 @@ int cr_dump_tasks(pid_t pid)
 	if (collect_pstree())
 		goto err;
 
+	if (checkpoint_devices())
+		goto err;
+
 	if (collect_pstree_ids())
 		goto err;
 
 	if (network_lock())
 		goto err;
 
+	if (rpc_query_external_files())
+		goto err;
+
 	if (collect_file_locks())
 		goto err;
 
@@ -2164,6 +2247,10 @@ int cr_dump_tasks(pid_t pid)
 			goto err;
 	}
 
+	ret = run_plugins(DUMP_DEVICES_LATE, pid);
+	if (ret && ret != -ENOTSUP)
+		goto err;
+
 	if (parent_ie) {
 		inventory_entry__free_unpacked(parent_ie, NULL);
 		parent_ie = NULL;
@@ -2200,49 +2287,44 @@ int cr_dump_tasks(pid_t pid)
 	 * ipc shared memory, but an ipc namespace is dumped in a child
 	 * process.
 	 */
-	ret = cr_dump_shmem();
-	if (ret)
+	if (cr_dump_shmem())
 		goto err;
 
 	if (root_ns_mask) {
-		ret = dump_namespaces(root_item, root_ns_mask);
-		if (ret)
+		if (dump_namespaces(root_item, root_ns_mask))
 			goto err;
 	}
 
 	if ((root_ns_mask & CLONE_NEWTIME) == 0) {
-		ret = dump_time_ns(0);
-		if (ret)
+		if (dump_time_ns(0))
 			goto err;
 	}
 
 	if (dump_aa_namespaces() < 0)
 		goto err;
 
-	ret = dump_cgroups();
-	if (ret)
+	if (dump_cgroups())
 		goto err;
 
-	ret = fix_external_unix_sockets();
-	if (ret)
+	if (fix_external_unix_sockets())
 		goto err;
 
-	ret = tty_post_actions();
-	if (ret)
+	if (tty_post_actions())
 		goto err;
 
-	ret = inventory_save_uptime(&he);
-	if (ret)
+	if (inventory_save_uptime(&he))
 		goto err;
 
 	he.has_pre_dump_mode = false;
+	if (found_uprobes_vma()) {
+		he.has_allow_uprobes = true;
+		he.allow_uprobes = true;
+	}
 
-	ret = write_img_inventory(&he);
-	if (ret)
-		goto err;
+	exit_code = write_img_inventory(&he);
 err:
 	if (parent_ie)
 		inventory_entry__free_unpacked(parent_ie, NULL);
 
-	return cr_dump_finish(ret);
+	return cr_dump_finish(exit_code);
 }
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 9853c0585..b92b92715 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -17,12 +17,12 @@
 #include <sys/mount.h>
 #include <sys/prctl.h>
 #include <sched.h>
+#include <linux/elf.h>
 
 #include "types.h"
 #include <compel/ptrace.h>
 #include "common/compiler.h"
 
-#include "linux/mount.h"
 #include "linux/rseq.h"
 
 #include "clone-noasan.h"
@@ -80,12 +80,15 @@
 #include "timens.h"
 #include "bpfmap.h"
 #include "apparmor.h"
+#include "pidfd.h"
 
 #include "parasite-syscall.h"
 #include "files-reg.h"
 #include <compel/plugins/std/syscall-codes.h>
 #include "compel/include/asm/syscall.h"
 
+#include "linux/mount.h"
+
 #include "protobuf.h"
 #include "images/sa.pb-c.h"
 #include "images/timer.pb-c.h"
@@ -97,6 +100,8 @@
 #include "restore.h"
 
 #include "cr-errno.h"
+#include "timer.h"
+#include "sigact.h"
 
 #ifndef arch_export_restore_thread
 #define arch_export_restore_thread __export_restore_thread
@@ -117,7 +122,6 @@ static int restore_task_with_children(void *);
 static int sigreturn_restore(pid_t pid, struct task_restore_args *ta, unsigned long alen, CoreEntry *core);
 static int prepare_restorer_blob(void);
 static int prepare_rlimits(int pid, struct task_restore_args *, CoreEntry *core);
-static int prepare_posix_timers(int pid, struct task_restore_args *ta, CoreEntry *core);
 static int prepare_signals(int pid, struct task_restore_args *, CoreEntry *core);
 
 /*
@@ -278,7 +282,7 @@ static struct collect_image_info *cinfos_files[] = {
 	&unix_sk_cinfo,	      &fifo_cinfo,     &pipe_cinfo,    &nsfile_cinfo,	    &packet_sk_cinfo,
 	&netlink_sk_cinfo,    &eventfd_cinfo,  &epoll_cinfo,   &epoll_tfd_cinfo,    &signalfd_cinfo,
 	&tunfile_cinfo,	      &timerfd_cinfo,  &inotify_cinfo, &inotify_mark_cinfo, &fanotify_cinfo,
-	&fanotify_mark_cinfo, &ext_file_cinfo, &memfd_cinfo,
+	&fanotify_mark_cinfo, &ext_file_cinfo, &memfd_cinfo, &pidfd_cinfo
 };
 
 /* These images are required to restore namespaces */
@@ -351,6 +355,10 @@ static int root_prepare_shared(void)
 	if (ret)
 		goto err;
 
+	ret = add_fake_unix_queuers();
+	if (ret)
+		goto err;
+
 	/*
 	 * This should be called with all packets collected AND all
 	 * fdescs and fles prepared BUT post-prep-s not run.
@@ -367,10 +375,6 @@ static int root_prepare_shared(void)
 	if (ret)
 		goto err;
 
-	ret = add_fake_unix_queuers();
-	if (ret)
-		goto err;
-
 	show_saved_files();
 err:
 	return ret;
@@ -406,268 +410,6 @@ static int populate_pid_proc(void)
 	return 0;
 }
 
-static rt_sigaction_t sigchld_act;
-/*
- * If parent's sigaction has blocked SIGKILL (which is non-sense),
- * this parent action is non-valid and shouldn't be inherited.
- * Used to mark parent_act* no more valid.
- */
-static rt_sigaction_t parent_act[SIGMAX];
-#ifdef CONFIG_COMPAT
-static rt_sigaction_t_compat parent_act_compat[SIGMAX];
-#endif
-
-static bool sa_inherited(int sig, rt_sigaction_t *sa)
-{
-	rt_sigaction_t *pa;
-	int i;
-
-	if (current == root_item)
-		return false; /* XXX -- inherit from CRIU? */
-
-	pa = &parent_act[sig];
-
-	/* Omitting non-valid sigaction */
-	if (pa->rt_sa_mask.sig[0] & (1 << SIGKILL))
-		return false;
-
-	for (i = 0; i < _KNSIG_WORDS; i++)
-		if (pa->rt_sa_mask.sig[i] != sa->rt_sa_mask.sig[i])
-			return false;
-
-	return pa->rt_sa_handler == sa->rt_sa_handler && pa->rt_sa_flags == sa->rt_sa_flags &&
-	       pa->rt_sa_restorer == sa->rt_sa_restorer;
-}
-
-static int restore_native_sigaction(int sig, SaEntry *e)
-{
-	rt_sigaction_t act;
-	int ret;
-
-	ASSIGN_TYPED(act.rt_sa_handler, decode_pointer(e->sigaction));
-	ASSIGN_TYPED(act.rt_sa_flags, e->flags);
-	ASSIGN_TYPED(act.rt_sa_restorer, decode_pointer(e->restorer));
-#ifdef CONFIG_MIPS
-	e->has_mask_extended = 1;
-	BUILD_BUG_ON(sizeof(e->mask) * 2 != sizeof(act.rt_sa_mask.sig));
-
-	memcpy(&(act.rt_sa_mask.sig[0]), &e->mask, sizeof(act.rt_sa_mask.sig[0]));
-	memcpy(&(act.rt_sa_mask.sig[1]), &e->mask_extended, sizeof(act.rt_sa_mask.sig[1]));
-#else
-	BUILD_BUG_ON(sizeof(e->mask) != sizeof(act.rt_sa_mask.sig));
-	memcpy(act.rt_sa_mask.sig, &e->mask, sizeof(act.rt_sa_mask.sig));
-#endif
-	if (sig == SIGCHLD) {
-		sigchld_act = act;
-		return 0;
-	}
-
-	if (sa_inherited(sig - 1, &act))
-		return 1;
-
-	/*
-	 * A pure syscall is used, because glibc
-	 * sigaction overwrites se_restorer.
-	 */
-	ret = syscall(SYS_rt_sigaction, sig, &act, NULL, sizeof(k_rtsigset_t));
-	if (ret < 0) {
-		pr_perror("Can't restore sigaction");
-		return ret;
-	}
-
-	parent_act[sig - 1] = act;
-	/* Mark SIGKILL blocked which makes compat sigaction non-valid */
-#ifdef CONFIG_COMPAT
-	parent_act_compat[sig - 1].rt_sa_mask.sig[0] |= 1 << SIGKILL;
-#endif
-
-	return 1;
-}
-
-static void *stack32;
-
-#ifdef CONFIG_COMPAT
-static bool sa_compat_inherited(int sig, rt_sigaction_t_compat *sa)
-{
-	rt_sigaction_t_compat *pa;
-	int i;
-
-	if (current == root_item)
-		return false;
-
-	pa = &parent_act_compat[sig];
-
-	/* Omitting non-valid sigaction */
-	if (pa->rt_sa_mask.sig[0] & (1 << SIGKILL))
-		return false;
-
-	for (i = 0; i < _KNSIG_WORDS; i++)
-		if (pa->rt_sa_mask.sig[i] != sa->rt_sa_mask.sig[i])
-			return false;
-
-	return pa->rt_sa_handler == sa->rt_sa_handler && pa->rt_sa_flags == sa->rt_sa_flags &&
-	       pa->rt_sa_restorer == sa->rt_sa_restorer;
-}
-
-static int restore_compat_sigaction(int sig, SaEntry *e)
-{
-	rt_sigaction_t_compat act;
-	int ret;
-
-	ASSIGN_TYPED(act.rt_sa_handler, (u32)e->sigaction);
-	ASSIGN_TYPED(act.rt_sa_flags, e->flags);
-	ASSIGN_TYPED(act.rt_sa_restorer, (u32)e->restorer);
-	BUILD_BUG_ON(sizeof(e->mask) != sizeof(act.rt_sa_mask.sig));
-	memcpy(act.rt_sa_mask.sig, &e->mask, sizeof(act.rt_sa_mask.sig));
-
-	if (sig == SIGCHLD) {
-		memcpy(&sigchld_act, &act, sizeof(rt_sigaction_t_compat));
-		return 0;
-	}
-
-	if (sa_compat_inherited(sig - 1, &act))
-		return 1;
-
-	if (!stack32) {
-		stack32 = alloc_compat_syscall_stack();
-		if (!stack32)
-			return -1;
-	}
-
-	ret = arch_compat_rt_sigaction(stack32, sig, &act);
-	if (ret < 0) {
-		pr_err("Can't restore compat sigaction: %d\n", ret);
-		return ret;
-	}
-
-	parent_act_compat[sig - 1] = act;
-	/* Mark SIGKILL blocked which makes native sigaction non-valid */
-	parent_act[sig - 1].rt_sa_mask.sig[0] |= 1 << SIGKILL;
-
-	return 1;
-}
-#else
-static int restore_compat_sigaction(int sig, SaEntry *e)
-{
-	return -1;
-}
-#endif
-
-static int prepare_sigactions_from_core(TaskCoreEntry *tc)
-{
-	int sig, i;
-
-	if (tc->n_sigactions != SIGMAX - 2) {
-		pr_err("Bad number of sigactions in the image (%d, want %d)\n", (int)tc->n_sigactions, SIGMAX - 2);
-		return -1;
-	}
-
-	pr_info("Restore on-core sigactions for %d\n", vpid(current));
-
-	for (sig = 1, i = 0; sig <= SIGMAX; sig++) {
-		int ret;
-		SaEntry *e;
-		bool sigaction_is_compat;
-
-		if (sig == SIGKILL || sig == SIGSTOP)
-			continue;
-
-		e = tc->sigactions[i++];
-		sigaction_is_compat = e->has_compat_sigaction && e->compat_sigaction;
-		if (sigaction_is_compat)
-			ret = restore_compat_sigaction(sig, e);
-		else
-			ret = restore_native_sigaction(sig, e);
-
-		if (ret < 0)
-			return ret;
-	}
-
-	return 0;
-}
-
-/* Returns number of restored signals, -1 or negative errno on fail */
-static int restore_one_sigaction(int sig, struct cr_img *img, int pid)
-{
-	bool sigaction_is_compat;
-	SaEntry *e;
-	int ret = 0;
-
-	BUG_ON(sig == SIGKILL || sig == SIGSTOP);
-
-	ret = pb_read_one_eof(img, &e, PB_SIGACT);
-	if (ret == 0) {
-		if (sig != SIGMAX_OLD + 1) { /* backward compatibility */
-			pr_err("Unexpected EOF %d\n", sig);
-			return -1;
-		}
-		pr_warn("This format of sigacts-%d.img is deprecated\n", pid);
-		return -1;
-	}
-	if (ret < 0)
-		return ret;
-
-	sigaction_is_compat = e->has_compat_sigaction && e->compat_sigaction;
-	if (sigaction_is_compat)
-		ret = restore_compat_sigaction(sig, e);
-	else
-		ret = restore_native_sigaction(sig, e);
-
-	sa_entry__free_unpacked(e, NULL);
-
-	return ret;
-}
-
-static int prepare_sigactions_from_image(void)
-{
-	int pid = vpid(current);
-	struct cr_img *img;
-	int sig, rst = 0;
-	int ret = 0;
-
-	pr_info("Restore sigacts for %d\n", pid);
-
-	img = open_image(CR_FD_SIGACT, O_RSTR, pid);
-	if (!img)
-		return -1;
-
-	for (sig = 1; sig <= SIGMAX; sig++) {
-		if (sig == SIGKILL || sig == SIGSTOP)
-			continue;
-
-		ret = restore_one_sigaction(sig, img, pid);
-		if (ret < 0)
-			break;
-		if (ret)
-			rst++;
-	}
-
-	pr_info("Restored %d/%d sigacts\n", rst, SIGMAX - 3 /* KILL, STOP and CHLD */);
-
-	close_image(img);
-	return ret;
-}
-
-static int prepare_sigactions(CoreEntry *core)
-{
-	int ret;
-
-	if (!task_alive(current))
-		return 0;
-
-	if (core->tc->n_sigactions != 0)
-		ret = prepare_sigactions_from_core(core->tc);
-	else
-		ret = prepare_sigactions_from_image();
-
-	if (stack32) {
-		free_compat_syscall_stack(stack32);
-		stack32 = NULL;
-	}
-
-	return ret;
-}
-
 static int __collect_child_pids(struct pstree_item *p, int state, unsigned int *n)
 {
 	struct pstree_item *pi;
@@ -862,6 +604,9 @@ static int prepare_proc_misc(pid_t pid, TaskCoreEntry *tc, struct task_restore_a
 	if (tc->has_child_subreaper)
 		args->child_subreaper = tc->child_subreaper;
 
+	if (tc->has_membarrier_registration_mask)
+		args->membarrier_registration_mask = tc->membarrier_registration_mask;
+
 	/* loginuid value is critical to restore */
 	if (kdat.luid == LUID_FULL && tc->has_loginuid && tc->loginuid != INVALID_UID) {
 		ret = prepare_loginuid(tc->loginuid);
@@ -878,7 +623,6 @@ static int prepare_proc_misc(pid_t pid, TaskCoreEntry *tc, struct task_restore_a
 	return 0;
 }
 
-static int prepare_itimers(int pid, struct task_restore_args *args, CoreEntry *core);
 static int prepare_mm(pid_t pid, struct task_restore_args *args);
 
 static int restore_one_alive_task(int pid, CoreEntry *core)
@@ -971,6 +715,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
 	if (setup_uffd(pid, ta))
 		return -1;
 
+	if (arch_shstk_prepare(current, core, ta))
+		return -1;
+
 	return sigreturn_restore(pid, ta, args_len, core);
 }
 
@@ -1348,7 +1095,22 @@ static inline int fork_with_pid(struct pstree_item *item)
 			return -1;
 
 		item->pid->state = ca.core->tc->task_state;
-		rsti(item)->cg_set = ca.core->tc->cg_set;
+
+		/*
+		 * Zombie tasks' cgroup is not dumped/restored.
+		 * cg_set == 0 is skipped in prepare_task_cgroup()
+		 */
+		if (item->pid->state == TASK_DEAD) {
+			rsti(item)->cg_set = 0;
+		} else {
+			if (ca.core->thread_core->has_cg_set)
+				rsti(item)->cg_set = ca.core->thread_core->cg_set;
+			else
+				rsti(item)->cg_set = ca.core->tc->cg_set;
+		}
+
+		if (ca.core->tc->has_stop_signo)
+			item->pid->stop_signo = ca.core->tc->stop_signo;
 
 		if (item->pid->state != TASK_DEAD && !task_alive(item)) {
 			pr_err("Unknown task state %d\n", item->pid->state);
@@ -1476,6 +1238,8 @@ static inline int fork_with_pid(struct pstree_item *item)
 		pr_debug("PID: real %d virt %d\n", item->pid->real, vpid(item));
 	}
 
+	arch_shstk_unlock(item, ca.core, ret);
+
 err_unlock:
 	if (!(ca.clone_flags & CLONE_NEWPID))
 		unlock_last_pid();
@@ -1742,7 +1506,7 @@ static int create_children_and_session(void)
 	return 0;
 }
 
-static int restore_task_with_children(void *_arg)
+static int __restore_task_with_children(void *_arg)
 {
 	struct cr_clone_arg *ca = _arg;
 	pid_t pid;
@@ -1778,7 +1542,7 @@ static int restore_task_with_children(void *_arg)
 	}
 
 	if (log_init_by_pid(vpid(current)))
-		return -1;
+		goto err;
 
 	if (current->parent == NULL) {
 		/*
@@ -1805,9 +1569,19 @@ static int restore_task_with_children(void *_arg)
 				goto err;
 		}
 
+		if (set_opts_cap_eff())
+			goto err;
+
 		/* Wait prepare_userns */
 		if (restore_finish_ns_stage(CR_STATE_ROOT_TASK, CR_STATE_PREPARE_NAMESPACES) < 0)
 			goto err;
+
+		/*
+		 * Since we don't support nesting of cgroup namespaces, let's
+		 * only set up the cgns (if it exists) in the init task.
+		 */
+		if (prepare_cgroup_namespace(current) < 0)
+			goto err;
 	}
 
 	if (needs_prep_creds(current) && (prepare_userns_creds()))
@@ -1819,7 +1593,7 @@ static int restore_task_with_children(void *_arg)
 	 * we will only move the root one there, others will
 	 * just have it inherited.
 	 */
-	if (prepare_task_cgroup(current) < 0)
+	if (restore_task_cgroup(current) < 0)
 		goto err;
 
 	/* Restore root task */
@@ -1924,6 +1698,19 @@ err:
 	exit(1);
 }
 
+static int restore_task_with_children(void *_arg)
+{
+	struct cr_clone_arg *arg = _arg;
+	struct pstree_item *item = arg->item;
+	CoreEntry *core = arg->core;
+
+	return arch_shstk_trampoline(item, core, __restore_task_with_children,
+				     arg);
+}
+
+int __attribute((weak)) arch_ptrace_restore(int pid, struct pstree_item *item);
+int arch_ptrace_restore(int pid, struct pstree_item *item) { return 0; }
+
 static int attach_to_tasks(bool root_seized)
 {
 	struct pstree_item *item;
@@ -1960,6 +1747,12 @@ static int attach_to_tasks(bool root_seized)
 				return -1;
 			}
 
+			if (ptrace(PTRACE_SETOPTIONS, pid, NULL, PTRACE_O_TRACESYSGOOD)) {
+				pr_perror("Unable to set PTRACE_O_TRACESYSGOOD for %d", pid);
+				return -1;
+			}
+			if (arch_ptrace_restore(pid, item))
+				return -1;
 			/*
 			 * Suspend seccomp if necessary. We need to do this because
 			 * although seccomp is restored at the very end of the
@@ -2024,9 +1817,10 @@ static int restore_rseq_cs(void)
 	return 0;
 }
 
-static int catch_tasks(bool root_seized, enum trace_flags *flag)
+static int catch_tasks(bool root_seized)
 {
 	struct pstree_item *item;
+	bool nobp = fault_injected(FI_NO_BREAKPOINTS) || !kdat.has_breakpoints;
 
 	for_each_pstree_item(item) {
 		int status, i, ret;
@@ -2054,7 +1848,7 @@ static int catch_tasks(bool root_seized, enum trace_flags *flag)
 				return -1;
 			}
 
-			ret = compel_stop_pie(pid, rsti(item)->breakpoint, flag, fault_injected(FI_NO_BREAKPOINTS));
+			ret = compel_stop_pie(pid, rsti(item)->breakpoint, nobp);
 			if (ret < 0)
 				return -1;
 		}
@@ -2063,24 +1857,6 @@ static int catch_tasks(bool root_seized, enum trace_flags *flag)
 	return 0;
 }
 
-static int clear_breakpoints(void)
-{
-	struct pstree_item *item;
-	int ret = 0, i;
-
-	if (fault_injected(FI_NO_BREAKPOINTS))
-		return 0;
-
-	for_each_pstree_item(item) {
-		if (!task_alive(item))
-			continue;
-		for (i = 0; i < item->nr_threads; i++)
-			ret |= ptrace_flush_breakpoints(item->threads[i].real);
-	}
-
-	return ret;
-}
-
 static void finalize_restore(void)
 {
 	struct pstree_item *item;
@@ -2104,8 +1880,14 @@ static void finalize_restore(void)
 
 		xfree(ctl);
 
-		if ((item->pid->state == TASK_STOPPED) || (opts.final_state == TASK_STOPPED))
+		if (opts.final_state == TASK_STOPPED)
 			kill(item->pid->real, SIGSTOP);
+		else if (item->pid->state == TASK_STOPPED) {
+			if (item->pid->stop_signo > 0)
+				kill(item->pid->real, item->pid->stop_signo);
+			else
+				kill(item->pid->real, SIGSTOP);
+		}
 	}
 }
 
@@ -2215,7 +1997,6 @@ static void reap_zombies(void)
 
 static int restore_root_task(struct pstree_item *init)
 {
-	enum trace_flags flag = TRACE_ALL;
 	int ret, fd, mnt_ns_fd = -1;
 	int root_seized = 0;
 	struct pstree_item *item;
@@ -2339,7 +2120,7 @@ static int restore_root_task(struct pstree_item *init)
 		 * the '--empty-ns net' mode no iptables C/R is done and we
 		 * need to return these rules by hands.
 		 */
-		ret = network_lock_internal();
+		ret = network_lock_internal(/* restore = */ true);
 		if (ret)
 			goto out_kill;
 	}
@@ -2351,6 +2132,9 @@ static int restore_root_task(struct pstree_item *init)
 	__restore_switch_stage(CR_STATE_FORKING);
 
 skip_ns_bouncing:
+	ret = run_plugins(POST_FORKING);
+	if (ret < 0 && ret != -ENOTSUP)
+		goto out_kill;
 
 	ret = restore_wait_inprogress_tasks();
 	if (ret < 0)
@@ -2378,6 +2162,10 @@ skip_ns_bouncing:
 	if (ret < 0)
 		goto out_kill;
 
+	ret = stop_cgroupd();
+	if (ret < 0)
+		goto out_kill;
+
 	ret = move_veth_to_bridge();
 	if (ret < 0)
 		goto out_kill;
@@ -2430,7 +2218,7 @@ skip_ns_bouncing:
 
 	timing_stop(TIME_RESTORE);
 
-	if (catch_tasks(root_seized, &flag)) {
+	if (catch_tasks(root_seized)) {
 		pr_err("Can't catch all tasks\n");
 		goto out_kill_network_unlocked;
 	}
@@ -2440,16 +2228,18 @@ skip_ns_bouncing:
 
 	__restore_switch_stage(CR_STATE_COMPLETE);
 
-	ret = compel_stop_on_syscall(task_entries->nr_threads, __NR(rt_sigreturn, 0), __NR(rt_sigreturn, 1), flag);
+	ret = compel_stop_on_syscall(task_entries->nr_threads, __NR(rt_sigreturn, 0), __NR(rt_sigreturn, 1));
 	if (ret) {
 		pr_err("Can't stop all tasks on rt_sigreturn\n");
 		goto out_kill_network_unlocked;
 	}
 
-	if (clear_breakpoints())
-		pr_err("Unable to flush breakpoints\n");
-
 	finalize_restore();
+
+	/* just before releasing threads we have to restore rseq_cs */
+	if (restore_rseq_cs())
+		pr_err("Unable to restore rseq_cs state\n");
+
 	/*
 	 * Some external devices such as GPUs might need a very late
 	 * trigger to kick-off some events, memory notifiers and for
@@ -2459,8 +2249,10 @@ skip_ns_bouncing:
 	 * mapped memory) could be done sanely once the pie code hands
 	 * over the control to master process.
 	 */
+	pr_info("Run late stage hook from criu master for external devices\n");
 	for_each_pstree_item(item) {
-		pr_info("Run late stage hook from criu master for external devices\n");
+		if (!task_alive(item))
+			continue;
 		ret = run_plugins(RESUME_DEVICES_LATE, item->pid->real);
 		/*
 		 * This may not really be an error. Only certain plugin hooks
@@ -2470,7 +2262,7 @@ skip_ns_bouncing:
 		 * might actually be a true error code but that would be also
 		 * captured in the plugin so no need to print the error here.
 		 */
-		if (ret < 0)
+		if (ret < 0 && ret != -ENOTSUP)
 			pr_debug("restore late stage hook for external plugin failed\n");
 	}
 
@@ -2481,10 +2273,6 @@ skip_ns_bouncing:
 	if (restore_freezer_state())
 		pr_err("Unable to restore freezer state\n");
 
-	/* just before releasing threads we have to restore rseq_cs */
-	if (restore_rseq_cs())
-		pr_err("Unable to restore rseq_cs state\n");
-
 	/* Detaches from processes and they continue run through sigreturn. */
 	if (finalize_restore_detach())
 		goto out_kill_network_unlocked;
@@ -2551,6 +2339,7 @@ int prepare_task_entries(void)
 	task_entries->nr_helpers = 0;
 	futex_set(&task_entries->start, CR_STATE_FAIL);
 	mutex_init(&task_entries->userns_sync_lock);
+	mutex_init(&task_entries->cgroupd_sync_lock);
 	mutex_init(&task_entries->last_pid_mutex);
 
 	return 0;
@@ -2576,42 +2365,48 @@ int cr_restore_tasks(void)
 	if (init_service_fd())
 		return 1;
 
-	if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE))
+	if (check_img_inventory(/* restore = */ true) < 0)
 		return -1;
 
-	if (check_img_inventory(/* restore = */ true) < 0)
-		goto err;
-
 	if (init_stats(RESTORE_STATS))
-		goto err;
+		return -1;
 
 	if (lsm_check_opts())
-		goto err;
+		return -1;
 
 	timing_start(TIME_RESTORE);
 
 	if (cpu_init() < 0)
-		goto err;
+		return -1;
 
 	if (vdso_init_restore())
-		goto err;
+		return -1;
 
 	if (tty_init_restore())
-		goto err;
+		return -1;
 
 	if (opts.cpu_cap & CPU_CAP_IMAGE) {
 		if (cpu_validate_cpuinfo())
-			goto err;
+			return -1;
 	}
 
 	if (prepare_task_entries() < 0)
-		goto err;
+		return -1;
 
 	if (prepare_pstree() < 0)
-		goto err;
+		return -1;
 
 	if (fdstore_init())
-		goto err;
+		return -1;
+
+	/*
+	 * For the AMDGPU plugin, its parallel restore feature needs to use fdstore to store
+	 * its socket file descriptor. This allows the main process and the target process to
+	 * communicate with each other through this file descriptor. Therefore, cr_plugin_init
+	 * must be initialized after fdstore_init.
+	 */
+	if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE))
+		return -1;
 
 	if (inherit_fd_move_to_fdstore())
 		goto err;
@@ -2636,23 +2431,24 @@ err:
 	return ret;
 }
 
-static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_head *self_vma_list, long vma_len)
+static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_head *self_vma_list, long min_addr, long vma_len)
 {
 	struct vma_area *t_vma, *s_vma;
-	long prev_vma_end = 0;
+	long prev_vma_end = min_addr;
 	struct vma_area end_vma;
 	VmaEntry end_e;
 
 	end_vma.e = &end_e;
 	end_e.start = end_e.end = kdat.task_size;
-	prev_vma_end = kdat.mmap_min_addr;
+	INIT_LIST_HEAD(&end_vma.list);
 
 	s_vma = list_first_entry(self_vma_list, struct vma_area, list);
 	t_vma = list_first_entry(tgt_vma_list, struct vma_area, list);
 
 	while (1) {
 		if (prev_vma_end + vma_len > s_vma->e->start) {
-			if (s_vma->list.next == self_vma_list) {
+			if ((s_vma->list.next == self_vma_list) ||
+			    vma_area_is(vma_next(s_vma), VMA_AREA_GUARD)) {
 				s_vma = &end_vma;
 				continue;
 			}
@@ -2665,7 +2461,8 @@ static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_he
 		}
 
 		if (prev_vma_end + vma_len > t_vma->e->start) {
-			if (t_vma->list.next == tgt_vma_list) {
+			if ((t_vma->list.next == tgt_vma_list) ||
+			    vma_area_is(vma_next(t_vma), VMA_AREA_GUARD)) {
 				t_vma = &end_vma;
 				continue;
 			}
@@ -2683,251 +2480,6 @@ static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_he
 	return -1;
 }
 
-static inline int timeval_valid(struct timeval *tv)
-{
-	return (tv->tv_sec >= 0) && ((unsigned long)tv->tv_usec < USEC_PER_SEC);
-}
-
-static inline int decode_itimer(char *n, ItimerEntry *ie, struct itimerval *val)
-{
-	if (ie->isec == 0 && ie->iusec == 0) {
-		memzero_p(val);
-		return 0;
-	}
-
-	val->it_interval.tv_sec = ie->isec;
-	val->it_interval.tv_usec = ie->iusec;
-
-	if (!timeval_valid(&val->it_interval)) {
-		pr_err("Invalid timer interval\n");
-		return -1;
-	}
-
-	if (ie->vsec == 0 && ie->vusec == 0) {
-		/*
-		 * Remaining time was too short. Set it to
-		 * interval to make the timer armed and work.
-		 */
-		val->it_value.tv_sec = ie->isec;
-		val->it_value.tv_usec = ie->iusec;
-	} else {
-		val->it_value.tv_sec = ie->vsec;
-		val->it_value.tv_usec = ie->vusec;
-	}
-
-	if (!timeval_valid(&val->it_value)) {
-		pr_err("Invalid timer value\n");
-		return -1;
-	}
-
-	pr_info("Restored %s timer to %ld.%ld -> %ld.%ld\n", n, val->it_value.tv_sec, val->it_value.tv_usec,
-		val->it_interval.tv_sec, val->it_interval.tv_usec);
-
-	return 0;
-}
-
-/*
- * Legacy itimers restore from CR_FD_ITIMERS
- */
-
-static int prepare_itimers_from_fd(int pid, struct task_restore_args *args)
-{
-	int ret = -1;
-	struct cr_img *img;
-	ItimerEntry *ie;
-
-	if (!deprecated_ok("Itimers"))
-		return -1;
-
-	img = open_image(CR_FD_ITIMERS, O_RSTR, pid);
-	if (!img)
-		return -1;
-
-	ret = pb_read_one(img, &ie, PB_ITIMER);
-	if (ret < 0)
-		goto out;
-	ret = decode_itimer("real", ie, &args->itimers[0]);
-	itimer_entry__free_unpacked(ie, NULL);
-	if (ret < 0)
-		goto out;
-
-	ret = pb_read_one(img, &ie, PB_ITIMER);
-	if (ret < 0)
-		goto out;
-	ret = decode_itimer("virt", ie, &args->itimers[1]);
-	itimer_entry__free_unpacked(ie, NULL);
-	if (ret < 0)
-		goto out;
-
-	ret = pb_read_one(img, &ie, PB_ITIMER);
-	if (ret < 0)
-		goto out;
-	ret = decode_itimer("prof", ie, &args->itimers[2]);
-	itimer_entry__free_unpacked(ie, NULL);
-	if (ret < 0)
-		goto out;
-out:
-	close_image(img);
-	return ret;
-}
-
-static int prepare_itimers(int pid, struct task_restore_args *args, CoreEntry *core)
-{
-	int ret = 0;
-	TaskTimersEntry *tte = core->tc->timers;
-
-	if (!tte)
-		return prepare_itimers_from_fd(pid, args);
-
-	ret |= decode_itimer("real", tte->real, &args->itimers[0]);
-	ret |= decode_itimer("virt", tte->virt, &args->itimers[1]);
-	ret |= decode_itimer("prof", tte->prof, &args->itimers[2]);
-
-	return ret;
-}
-
-static inline int timespec_valid(struct timespec *ts)
-{
-	return (ts->tv_sec >= 0) && ((unsigned long)ts->tv_nsec < NSEC_PER_SEC);
-}
-
-static inline int decode_posix_timer(PosixTimerEntry *pte, struct restore_posix_timer *pt)
-{
-	pt->val.it_interval.tv_sec = pte->isec;
-	pt->val.it_interval.tv_nsec = pte->insec;
-
-	if (!timespec_valid(&pt->val.it_interval)) {
-		pr_err("Invalid timer interval(posix)\n");
-		return -1;
-	}
-
-	if (pte->vsec == 0 && pte->vnsec == 0) {
-		/*
-		 * Remaining time was too short. Set it to
-		 * interval to make the timer armed and work.
-		 */
-		pt->val.it_value.tv_sec = pte->isec;
-		pt->val.it_value.tv_nsec = pte->insec;
-	} else {
-		pt->val.it_value.tv_sec = pte->vsec;
-		pt->val.it_value.tv_nsec = pte->vnsec;
-	}
-
-	if (!timespec_valid(&pt->val.it_value)) {
-		pr_err("Invalid timer value(posix)\n");
-		return -1;
-	}
-
-	pt->spt.it_id = pte->it_id;
-	pt->spt.clock_id = pte->clock_id;
-	pt->spt.si_signo = pte->si_signo;
-	pt->spt.it_sigev_notify = pte->it_sigev_notify;
-	pt->spt.sival_ptr = decode_pointer(pte->sival_ptr);
-	pt->spt.notify_thread_id = pte->notify_thread_id;
-	pt->overrun = pte->overrun;
-
-	return 0;
-}
-
-static int cmp_posix_timer_proc_id(const void *p1, const void *p2)
-{
-	return ((struct restore_posix_timer *)p1)->spt.it_id - ((struct restore_posix_timer *)p2)->spt.it_id;
-}
-
-static void sort_posix_timers(struct task_restore_args *ta)
-{
-	void *tmem;
-
-	/*
-	 * This is required for restorer's create_posix_timers(),
-	 * it will probe them one-by-one for the desired ID, since
-	 * kernel doesn't provide another API for timer creation
-	 * with given ID.
-	 */
-
-	if (ta->posix_timers_n > 0) {
-		tmem = rst_mem_remap_ptr((unsigned long)ta->posix_timers, RM_PRIVATE);
-		qsort(tmem, ta->posix_timers_n, sizeof(struct restore_posix_timer), cmp_posix_timer_proc_id);
-	}
-}
-
-/*
- * Legacy posix timers restoration from CR_FD_POSIX_TIMERS
- */
-
-static int prepare_posix_timers_from_fd(int pid, struct task_restore_args *ta)
-{
-	struct cr_img *img;
-	int ret = -1;
-	struct restore_posix_timer *t;
-
-	if (!deprecated_ok("Posix timers"))
-		return -1;
-
-	img = open_image(CR_FD_POSIX_TIMERS, O_RSTR, pid);
-	if (!img)
-		return -1;
-
-	ta->posix_timers_n = 0;
-	while (1) {
-		PosixTimerEntry *pte;
-
-		ret = pb_read_one_eof(img, &pte, PB_POSIX_TIMER);
-		if (ret <= 0)
-			break;
-
-		t = rst_mem_alloc(sizeof(struct restore_posix_timer), RM_PRIVATE);
-		if (!t)
-			break;
-
-		ret = decode_posix_timer(pte, t);
-		if (ret < 0)
-			break;
-
-		posix_timer_entry__free_unpacked(pte, NULL);
-		ta->posix_timers_n++;
-	}
-
-	close_image(img);
-	if (!ret)
-		sort_posix_timers(ta);
-
-	return ret;
-}
-
-static int prepare_posix_timers(int pid, struct task_restore_args *ta, CoreEntry *core)
-{
-	int i, ret = -1;
-	TaskTimersEntry *tte = core->tc->timers;
-	struct restore_posix_timer *t;
-
-	ta->posix_timers = (struct restore_posix_timer *)rst_mem_align_cpos(RM_PRIVATE);
-
-	if (!tte)
-		return prepare_posix_timers_from_fd(pid, ta);
-
-	ta->posix_timers_n = tte->n_posix;
-	for (i = 0; i < ta->posix_timers_n; i++) {
-		t = rst_mem_alloc(sizeof(struct restore_posix_timer), RM_PRIVATE);
-		if (!t)
-			goto out;
-
-		if (decode_posix_timer(tte->posix[i], t))
-			goto out;
-	}
-
-	ret = 0;
-	sort_posix_timers(ta);
-out:
-	return ret;
-}
-
-static inline int verify_cap_size(CredsEntry *ce)
-{
-	return ((ce->n_cap_inh == CR_CAP_SIZE) && (ce->n_cap_eff == CR_CAP_SIZE) && (ce->n_cap_prm == CR_CAP_SIZE) &&
-		(ce->n_cap_bnd == CR_CAP_SIZE));
-}
-
 static int prepare_mm(pid_t pid, struct task_restore_args *args)
 {
 	int exe_fd, i, ret = -1;
@@ -2953,7 +2505,7 @@ static int prepare_mm(pid_t pid, struct task_restore_args *args)
 
 	args->fd_exe_link = exe_fd;
 
-	args->has_thp_enabled = rsti(current)->has_thp_enabled;
+	args->thp_disabled = mm->has_thp_disabled && mm->thp_disabled;
 
 	ret = 0;
 out:
@@ -3019,6 +2571,17 @@ static int remap_restorer_blob(void *addr)
 	restorer_setup_c_header_desc(&pbd, true);
 	compel_relocs_apply(addr, addr, &pbd);
 
+	/*
+	 * Ensure the infected thread sees the updated code.
+	 *
+	 * On architectures like ARM64, the Data Cache (D-cache) and
+	 * Instruction Cache (I-cache) are not automatically coherent.
+	 * Modifications land in the D-cache, so we must flush (clean) the
+	 * D-cache to push changes to RAM to ensure the CPU fetches the updated
+	 * instructions.
+	 */
+	__builtin___clear_cache(addr, addr + pbd.hdr.bsize);
+
 	return 0;
 }
 
@@ -3027,7 +2590,7 @@ static int validate_sched_parm(struct rst_sched_param *sp)
 	if ((sp->nice < -20) || (sp->nice > 19))
 		return 0;
 
-	switch (sp->policy) {
+	switch (sp->policy & ~SCHED_RESET_ON_FORK) {
 	case SCHED_RR:
 	case SCHED_FIFO:
 		return ((sp->prio > 0) && (sp->prio < 100));
@@ -3078,7 +2641,6 @@ static int prep_rseq(struct rst_rseq_param *rseq, ThreadCoreEntry *tc)
 	return 0;
 }
 
-#if defined(__GLIBC__) && defined(RSEQ_SIG)
 static void prep_libc_rseq_info(struct rst_rseq_param *rseq)
 {
 	if (!kdat.has_rseq) {
@@ -3086,23 +2648,29 @@ static void prep_libc_rseq_info(struct rst_rseq_param *rseq)
 		return;
 	}
 
-	rseq->rseq_abi_pointer = encode_pointer(__criu_thread_pointer() + __rseq_offset);
-	rseq->rseq_abi_size = __rseq_size;
-	rseq->signature = RSEQ_SIG;
-}
+	if (!kdat.has_ptrace_get_rseq_conf) {
+#if defined(__GLIBC__) && defined(RSEQ_SIG)
+		rseq->rseq_abi_pointer = encode_pointer(__criu_thread_pointer() + __rseq_offset);
+		/*
+		 * Current glibc reports the feature/active size in
+		 * __rseq_size, not the size passed to the kernel.
+		 * This could be 20, but older kernels expect 32 for
+		 * the size argument even if only 20 bytes are used.
+		 */
+		rseq->rseq_abi_size = __rseq_size;
+		if (rseq->rseq_abi_size < 32)
+			rseq->rseq_abi_size = 32;
+		rseq->signature = RSEQ_SIG;
 #else
-static void prep_libc_rseq_info(struct rst_rseq_param *rseq)
-{
-	/*
-	 * TODO: handle built-in rseq on other libc'ies like musl
-	 * We can do that using get_rseq_conf kernel feature.
-	 *
-	 * For now we just assume that other libc libraries are
-	 * not registering rseq by default.
-	 */
-	rseq->rseq_abi_pointer = 0;
-}
+		rseq->rseq_abi_pointer = 0;
 #endif
+		return;
+	}
+
+	rseq->rseq_abi_pointer = kdat.libc_rseq_conf.rseq_abi_pointer;
+	rseq->rseq_abi_size = kdat.libc_rseq_conf.rseq_abi_size;
+	rseq->signature = kdat.libc_rseq_conf.signature;
+}
 
 static rlim_t decode_rlim(rlim_t ival)
 {
@@ -3345,17 +2913,31 @@ static bool groups_match(gid_t *groups, int n_groups)
 	return ret;
 }
 
+static void copy_caps(u32 *out_caps, u32 *in_caps, int n_words)
+{
+	int i, cap_end;
+
+	for (i = kdat.last_cap + 1; i < 32 * n_words; ++i) {
+		if (~in_caps[i / 32] & (1 << (i % 32)))
+			continue;
+
+		pr_warn("Dropping unsupported capability %d > %d)\n", i, kdat.last_cap);
+		/* extra caps will be cleared below */
+	}
+
+	n_words = min(n_words, (kdat.last_cap + 31) / 32);
+	cap_end = (kdat.last_cap & 31) + 1;
+	memcpy(out_caps, in_caps, sizeof(*out_caps) * n_words);
+	if ((cap_end & 31) && n_words)
+		out_caps[n_words - 1] &= (1 << cap_end) - 1;
+	memset(out_caps + n_words, 0, sizeof(*out_caps) * (CR_CAP_SIZE - n_words));
+}
+
 static struct thread_creds_args *rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos)
 {
 	unsigned long this_pos;
 	struct thread_creds_args *args;
 
-	if (!verify_cap_size(ce)) {
-		pr_err("Caps size mismatch %d %d %d %d\n", (int)ce->n_cap_inh, (int)ce->n_cap_eff, (int)ce->n_cap_prm,
-		       (int)ce->n_cap_bnd);
-		return ERR_PTR(-EINVAL);
-	}
-
 	this_pos = rst_mem_align_cpos(RM_PRIVATE);
 
 	args = rst_mem_alloc(sizeof(*args), RM_PRIVATE);
@@ -3391,7 +2973,7 @@ static struct thread_creds_args *rst_prep_creds_args(CredsEntry *ce, unsigned lo
 
 			args = rst_mem_remap_ptr(this_pos, RM_PRIVATE);
 			args->lsm_profile = lsm_profile;
-			strlcpy(args->lsm_profile, rendered, lsm_profile_len + 1);
+			__strlcpy(args->lsm_profile, rendered, lsm_profile_len + 1);
 			xfree(rendered);
 		}
 	} else {
@@ -3425,7 +3007,7 @@ static struct thread_creds_args *rst_prep_creds_args(CredsEntry *ce, unsigned lo
 
 			args = rst_mem_remap_ptr(this_pos, RM_PRIVATE);
 			args->lsm_sockcreate = lsm_sockcreate;
-			strlcpy(args->lsm_sockcreate, rendered, lsm_sockcreate_len + 1);
+			__strlcpy(args->lsm_sockcreate, rendered, lsm_sockcreate_len + 1);
 			xfree(rendered);
 		}
 	} else {
@@ -3440,13 +3022,15 @@ static struct thread_creds_args *rst_prep_creds_args(CredsEntry *ce, unsigned lo
 	args->creds.cap_eff = NULL;
 	args->creds.cap_prm = NULL;
 	args->creds.cap_bnd = NULL;
+	args->creds.cap_amb = NULL;
 	args->creds.groups = NULL;
 	args->creds.lsm_profile = NULL;
 
-	memcpy(args->cap_inh, ce->cap_inh, sizeof(args->cap_inh));
-	memcpy(args->cap_eff, ce->cap_eff, sizeof(args->cap_eff));
-	memcpy(args->cap_prm, ce->cap_prm, sizeof(args->cap_prm));
-	memcpy(args->cap_bnd, ce->cap_bnd, sizeof(args->cap_bnd));
+	copy_caps(args->cap_inh, ce->cap_inh, ce->n_cap_inh);
+	copy_caps(args->cap_eff, ce->cap_eff, ce->n_cap_eff);
+	copy_caps(args->cap_prm, ce->cap_prm, ce->n_cap_prm);
+	copy_caps(args->cap_bnd, ce->cap_bnd, ce->n_cap_bnd);
+	copy_caps(args->cap_amb, ce->cap_amb, ce->n_cap_amb);
 
 	if (ce->n_groups && !groups_match(ce->groups, ce->n_groups)) {
 		unsigned int *groups;
@@ -3549,6 +3133,9 @@ static void *restorer_munmap_addr(CoreEntry *core, void *restorer_blob)
 	return restorer_sym(restorer_blob, arch_export_unmap);
 }
 
+void arch_rsti_init(struct pstree_item *p) __attribute__((weak));
+void arch_rsti_init(struct pstree_item *p) {}
+
 static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, unsigned long alen, CoreEntry *core)
 {
 	void *mem = MAP_FAILED;
@@ -3609,7 +3196,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 
 	rst_mem_size = rst_mem_lock();
 	memzone_size = round_up(sizeof(struct restore_mem_zone) * current->nr_threads, page_size());
-	task_args->bootstrap_len = restorer_len + memzone_size + alen + rst_mem_size;
+	task_args->bootstrap_len = restorer_len + memzone_size + alen + rst_mem_size + shstk_restorer_stack_size();
 	BUG_ON(task_args->bootstrap_len & (PAGE_SIZE - 1));
 	pr_info("%d threads require %ldK of memory\n", current->nr_threads, KBYTES(task_args->bootstrap_len));
 
@@ -3639,7 +3226,9 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 	 * or inited from scratch).
 	 */
 
-	mem = (void *)restorer_get_vma_hint(&vmas->h, &self_vmas.h, task_args->bootstrap_len);
+	mem = (void *)restorer_get_vma_hint(&vmas->h, &self_vmas.h,
+					    shstk_min_mmap_addr(&task_args->shstk, kdat.mmap_min_addr),
+					    task_args->bootstrap_len);
 	if (mem == (void *)-1) {
 		pr_err("No suitable area for task_restore bootstrap (%ldK)\n", task_args->bootstrap_len);
 		goto err;
@@ -3759,11 +3348,16 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 
 	prep_libc_rseq_info(&task_args->libc_rseq);
 
+	task_args->uid = opts.uid;
+	for (i = 0; i < CR_CAP_SIZE; i++)
+		task_args->cap_eff[i] = opts.cap_eff[i];
+
 	/*
 	 * Fill up per-thread data.
 	 */
 	creds_pos_next = creds_pos;
 	siginfo_n = task_args->siginfo_n;
+	arch_rsti_init(current);
 	for (i = 0; i < current->nr_threads; i++) {
 		CoreEntry *tcore;
 		struct rt_sigframe *sigframe;
@@ -3816,6 +3410,13 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 		thread_args[i].clear_tid_addr = CORE_THREAD_ARCH_INFO(tcore)->clear_tid_addr;
 		core_get_tls(tcore, &thread_args[i].tls);
 
+		if (tcore->thread_core->has_cg_set && rsti(current)->cg_set != tcore->thread_core->cg_set) {
+			thread_args[i].cg_set = tcore->thread_core->cg_set;
+			thread_args[i].cgroupd_sk = dup(get_service_fd(CGROUPD_SK));
+		} else {
+			thread_args[i].cg_set = -1;
+		}
+
 		ret = prep_rseq(&thread_args[i].rseq, tcore->thread_core);
 		if (ret)
 			goto err;
@@ -3866,6 +3467,10 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 	 * self-vmas are unmaped.
 	 */
 	mem += rst_mem_size;
+
+	shstk_set_restorer_stack(&task_args->shstk, mem);
+	mem += shstk_restorer_stack_size();
+
 	task_args->vdso_rt_parked_at = (unsigned long)mem;
 	task_args->vdso_maps_rt = vdso_maps_rt;
 	task_args->vdso_rt_size = vdso_rt_size;
@@ -3910,6 +3515,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 	close_service_fd(USERNSD_SK);
 	close_service_fd(FDSTORE_SK_OFF);
 	close_service_fd(RPC_SK_OFF);
+	close_service_fd(CGROUPD_SK);
 
 	__gcov_flush();
 
diff --git a/criu/cr-service.c b/criu/cr-service.c
index a6eb9ebd3..dccf4ef38 100644
--- a/criu/cr-service.c
+++ b/criu/cr-service.c
@@ -14,6 +14,7 @@
 #include <sys/stat.h>
 #include <arpa/inet.h>
 #include <sched.h>
+#include <sys/prctl.h>
 
 #include "version.h"
 #include "crtools.h"
@@ -239,15 +240,165 @@ int send_criu_rpc_script(enum script_actions act, char *name, int sk, int fd)
 	return 0;
 }
 
-static char images_dir[PATH_MAX];
+int exec_rpc_query_external_files(char *name, int sk)
+{
+	int i, ret;
+	CriuNotify cn = CRIU_NOTIFY__INIT;
+	CriuResp msg = CRIU_RESP__INIT;
+	CriuReq *req;
+
+	cn.script = name;
+
+	msg.type = CRIU_REQ_TYPE__NOTIFY;
+	msg.success = true;
+	msg.notify = &cn;
+
+	ret = send_criu_msg_with_fd(sk, &msg, -1);
+	if (ret < 0)
+		return ret;
+
+	ret = recv_criu_msg(sk, &req);
+	if (ret < 0)
+		return ret;
+
+	if (req->type != CRIU_REQ_TYPE__NOTIFY || !req->notify_success) {
+		pr_err("RPC client reported script error\n");
+		return -1;
+	}
+
+	ret = 0;
+	if (req->opts)
+		for (i = 0; i < req->opts->n_external; i++) {
+			char *key = req->opts->external[i];
+			pr_info("Adding external object: %s\n", key);
+			if (add_external(key)) {
+				pr_err("Failed to add external object: %s\n", key);
+				ret = -1;
+			}
+		}
+	else
+		pr_info("RPC NOTIFY %s: no `opts` returned.\n", name);
+
+	criu_req__free_unpacked(req, NULL);
+	return ret;
+}
+
+static int resolve_images_dir_path(char *images_dir_path,
+				   bool imgs_changed_by_rpc_conf,
+				   const CriuOpts *req,
+				   pid_t peer_pid)
+{
+	/*
+	 * images_dir_fd is a required RPC parameter with -1 as default value.
+	 *
+	 * This assumes that if opts.imgs_dir is set, we have a value
+	 * from the configuration file parser. The test to see that
+	 * imgs_changed_by_rpc_conf is true is used to make sure the value
+	 * is from the RPC configuration file. The idea is that only the
+	 * RPC configuration file is able to overwrite RPC settings:
+	 *  * apply_config(global_conf)
+	 *  * apply_config(user_conf)
+	 *  * apply_config(environment variable)
+	 *  * apply_rpc_options()
+	 *  * apply_config(rpc_conf)
+	 */
+	if (imgs_changed_by_rpc_conf) {
+		strncpy(images_dir_path, opts.imgs_dir, PATH_MAX - 1);
+		images_dir_path[PATH_MAX - 1] = '\0';
+	} else if (req->images_dir_fd != -1) {
+		snprintf(images_dir_path, PATH_MAX, "/proc/%d/fd/%d", peer_pid, req->images_dir_fd);
+	} else if (req->images_dir) {
+		strncpy(images_dir_path, req->images_dir, PATH_MAX - 1);
+		images_dir_path[PATH_MAX - 1] = '\0';
+	} else {
+		/*
+		 * Since images dir is not required in CHECK mode, we need to
+		 * check for work_dir_fd in setup_images_and_workdir()
+		 */
+		if (opts.mode == CR_CHECK)
+			return 0;
+		pr_err("Neither images_dir_fd nor images_dir was passed by RPC client.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int setup_images_and_workdir(const char *images_dir_path,
+				    bool work_changed_by_rpc_conf,
+				    CriuOpts *req,
+				    pid_t peer_pid)
+{
+	char work_dir_path[PATH_MAX] = "";
+
+	/* We don't need to open images dir in CHECK mode. */
+	if (opts.mode != CR_CHECK) {
+		/*
+		 * Image streaming is not supported with CRIU's service feature as
+		 * the streamer must be started for each dump/restore operation.
+		 * It is unclear how to do that with RPC, so we punt for now.
+		 * This explains why we provide the argument mode=-1 instead of
+		 * O_RSTR or O_DUMP.
+		 */
+		if (open_image_dir(images_dir_path, -1) < 0) {
+			pr_perror("Can't open images directory");
+			return -1;
+		}
+	}
+
+	if (work_changed_by_rpc_conf)
+		strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1);
+	else if (req->has_work_dir_fd)
+		sprintf(work_dir_path, "/proc/%d/fd/%d", peer_pid, req->work_dir_fd);
+	else if (opts.work_dir)
+		strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1);
+	else if (images_dir_path[0] != '\0')
+		strcpy(work_dir_path, images_dir_path);
+
+	if (work_dir_path[0] == '\0') {
+		pr_err("images-dir or work-dir is required when using log file\n");
+		return -1;
+	}
+
+	if (chdir(work_dir_path)) {
+		pr_perror("Can't chdir to work_dir");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int setup_logging_from_req(CriuOpts *req, bool output_changed_by_rpc_conf)
+{
+	if (req->log_file && !output_changed_by_rpc_conf) {
+		if (strchr(req->log_file, '/')) {
+			pr_perror("No subdirs are allowed in log_file name");
+			return -1;
+		}
+		SET_CHAR_OPTS(output, req->log_file);
+	} else if (req->has_log_to_stderr && req->log_to_stderr && !output_changed_by_rpc_conf) {
+		xfree(opts.output);
+		opts.output = NULL; /* log_init(NULL) writes to stderr */
+	} else if (!opts.output) {
+		SET_CHAR_OPTS(output, DEFAULT_LOG_FILENAME);
+	}
+
+	opts.log_level = req->log_level;
+	log_set_loglevel(opts.log_level);
+	if (log_init(opts.output)) {
+		pr_perror("Can't initiate log");
+		return -1;
+	}
+
+	return 0;
+}
 
 static int setup_opts_from_req(int sk, CriuOpts *req)
 {
 	struct ucred ids;
 	struct stat st;
 	socklen_t ids_len = sizeof(struct ucred);
-	char images_dir_path[PATH_MAX];
-	char work_dir_path[PATH_MAX];
+	char images_dir_path[PATH_MAX] = "";
 	char status_fd[PATH_MAX];
 	bool output_changed_by_rpc_conf = false;
 	bool work_changed_by_rpc_conf = false;
@@ -260,6 +411,23 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
 		goto err;
 	}
 
+	/*
+	 * The options relevant in CHECK mode are: log_file, log_to_stderr, and log_level.
+	 * When logging to a file, we also need to resolve images_dir and work_dir.
+	 */
+	if (opts.mode == CR_CHECK) {
+		if (!req)
+			return 0; /* nothing to do */
+
+		/*
+		 * A log file is needed only if:
+		 *   - log_file is explicitly set, or
+		 *   - log_to_stderr is NOT requested (i.e., using DEFAULT_LOG_FILENAME)
+		 */
+		if (!req->log_file || (req->has_log_to_stderr && req->log_to_stderr))
+			return 0; /* no log file, don't require images_dir or work_dir */
+	}
+
 	if (fstat(sk, &st)) {
 		pr_perror("Can't get socket stat");
 		goto err;
@@ -268,149 +436,8 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
 	BUG_ON(st.st_ino == -1);
 	service_sk_ino = st.st_ino;
 
-	/*
-	 * Evaluate an additional configuration file if specified.
-	 * This needs to happen twice, because it is needed early to detect
-	 * things like work_dir, imgs_dir and logfile. The second parsing
-	 * of the optional RPC configuration file happens at the end and
-	 * overwrites all options set via RPC.
-	 */
-	if (req->config_file) {
-		char *tmp_output = opts.output;
-		char *tmp_work = opts.work_dir;
-		char *tmp_imgs = opts.imgs_dir;
-
-		opts.output = NULL;
-		opts.work_dir = NULL;
-		opts.imgs_dir = NULL;
-
-		rpc_cfg_file = req->config_file;
-		i = parse_options(0, NULL, &dummy, &dummy, PARSING_RPC_CONF);
-		if (i) {
-			xfree(tmp_output);
-			xfree(tmp_work);
-			xfree(tmp_imgs);
-			goto err;
-		}
-		/* If this is non-NULL, the RPC configuration file had a value, use it.*/
-		if (opts.output)
-			output_changed_by_rpc_conf = true;
-		/* If this is NULL, use the old value if it was set. */
-		if (!opts.output && tmp_output) {
-			opts.output = tmp_output;
-			tmp_output = NULL;
-		}
-
-		if (opts.work_dir)
-			work_changed_by_rpc_conf = true;
-		if (!opts.work_dir && tmp_work) {
-			opts.work_dir = tmp_work;
-			tmp_work = NULL;
-		}
-
-		if (opts.imgs_dir)
-			imgs_changed_by_rpc_conf = true;
-		/*
-		 * As the images directory is a required RPC setting, it is not
-		 * necessary to use the value from other configuration files.
-		 * Either it is set in the RPC configuration file or it is set
-		 * via RPC.
-		 */
-		xfree(tmp_output);
-		xfree(tmp_work);
-		xfree(tmp_imgs);
-	}
-
-	/*
-	 * open images_dir - images_dir_fd is a required RPC parameter
-	 *
-	 * This assumes that if opts.imgs_dir is set we have a value
-	 * from the configuration file parser. The test to see that
-	 * imgs_changed_by_rpc_conf is true is used to make sure the value
-	 * is from the RPC configuration file.
-	 * The idea is that only the RPC configuration file is able to
-	 * overwrite RPC settings:
-	 *  * apply_config(global_conf)
-	 *  * apply_config(user_conf)
-	 *  * apply_config(environment variable)
-	 *  * apply_rpc_options()
-	 *  * apply_config(rpc_conf)
-	 */
-	if (imgs_changed_by_rpc_conf)
-		strncpy(images_dir_path, opts.imgs_dir, PATH_MAX - 1);
-	else
-		sprintf(images_dir_path, "/proc/%d/fd/%d", ids.pid, req->images_dir_fd);
-
-	if (req->parent_img)
-		SET_CHAR_OPTS(img_parent, req->parent_img);
-
-	/*
-	 * Image streaming is not supported with CRIU's service feature as
-	 * the streamer must be started for each dump/restore operation.
-	 * It is unclear how to do that with RPC, so we punt for now.
-	 * This explains why we provide the argument mode=-1 instead of
-	 * O_RSTR or O_DUMP.
-	 */
-	if (open_image_dir(images_dir_path, -1) < 0) {
-		pr_perror("Can't open images directory");
-		goto err;
-	}
-
-	/* get full path to images_dir to use in process title */
-	if (readlink(images_dir_path, images_dir, PATH_MAX) == -1) {
-		pr_perror("Can't readlink %s", images_dir_path);
-		goto err;
-	}
-
-	/* chdir to work dir */
-	if (work_changed_by_rpc_conf)
-		/* Use the value from the RPC configuration file first. */
-		strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1);
-	else if (req->has_work_dir_fd)
-		/* Use the value set via RPC. */
-		sprintf(work_dir_path, "/proc/%d/fd/%d", ids.pid, req->work_dir_fd);
-	else if (opts.work_dir)
-		/* Use the value from one of the other configuration files. */
-		strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1);
-	else
-		/* Use the images directory a work directory. */
-		strcpy(work_dir_path, images_dir_path);
-
-	if (chdir(work_dir_path)) {
-		pr_perror("Can't chdir to work_dir");
-		goto err;
-	}
-
-	/* initiate log file in work dir */
-	if (req->log_file && !output_changed_by_rpc_conf) {
-		/*
-		 * If RPC sets a log file and if there nothing from the
-		 * RPC configuration file, use the RPC value.
-		 */
-		if (strchr(req->log_file, '/')) {
-			pr_perror("No subdirs are allowed in log_file name");
-			goto err;
-		}
-
-		SET_CHAR_OPTS(output, req->log_file);
-	} else if (!opts.output) {
-		SET_CHAR_OPTS(output, DEFAULT_LOG_FILENAME);
-	}
-
-	/* This is needed later to correctly set the log_level */
-	opts.log_level = req->log_level;
-	log_set_loglevel(req->log_level);
-	if (log_init(opts.output) == -1) {
-		pr_perror("Can't initiate log");
-		goto err;
-	}
-
-	if (req->config_file) {
-		pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file);
-	}
-
-	if (kerndat_init())
-		return 1;
+	if (req->has_unprivileged)
+		opts.unprivileged = req->unprivileged;
 
 	if (log_keep_err()) {
 		pr_perror("Can't tune log");
@@ -421,6 +448,9 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
 	if (req->has_leave_running && req->leave_running)
 		opts.final_state = TASK_ALIVE;
 
+	if (req->has_leave_stopped && req->leave_stopped)
+		opts.final_state = TASK_STOPPED;
+
 	if (!req->has_pid) {
 		req->has_pid = true;
 		req->pid = ids.pid;
@@ -464,6 +494,9 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
 	if (req->has_shell_job)
 		opts.shell_job = req->shell_job;
 
+	if (req->has_skip_file_rwx_check)
+		opts.skip_file_rwx_check = req->skip_file_rwx_check;
+
 	if (req->has_file_locks)
 		opts.handle_file_locks = req->file_locks;
 
@@ -510,6 +543,9 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
 		case CRIU_NETWORK_LOCK_METHOD__NFTABLES:
 			opts.network_lock_method = NETWORK_LOCK_NFTABLES;
 			break;
+		case CRIU_NETWORK_LOCK_METHOD__SKIP:
+			opts.network_lock_method = NETWORK_LOCK_SKIP;
+			break;
 		default:
 			goto err;
 		}
@@ -685,14 +721,6 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
 		if (req->empty_ns & ~(CLONE_NEWNET))
 			goto err;
 	}
-
-	if (req->n_irmap_scan_paths) {
-		for (i = 0; i < req->n_irmap_scan_paths; i++) {
-			if (irmap_scan_path_add(req->irmap_scan_paths[i]))
-				goto err;
-		}
-	}
-
 	if (req->has_status_fd) {
 		pr_warn("status_fd is obsoleted; use status-ready notification instead\n");
 
@@ -704,25 +732,95 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
 		}
 	}
 
-	if (req->has_pidfd_store_sk && init_pidfd_store_sk(ids.pid, req->pidfd_store_sk))
-		goto err;
-
 	if (req->orphan_pts_master)
 		opts.orphan_pts_master = true;
 
-	/* Evaluate additional configuration file a second time to overwrite
-	 * all RPC settings. */
+	if (req->has_display_stats)
+		opts.display_stats = req->display_stats;
+
+	/* Evaluate additional configuration file (e.g., runc.conf) to overwrite all RPC settings. */
 	if (req->config_file) {
+		char *tmp_output = opts.output;
+		char *tmp_work = opts.work_dir;
+
+		opts.output = NULL;
+		opts.work_dir = NULL;
+
+		/*
+		 * As the images directory is a required RPC setting, it is not
+		 * necessary to use the value from other configuration files.
+		 * Either it is set in the RPC configuration file or it is set
+		 * via RPC.
+		 */
+		xfree(opts.imgs_dir);
+		opts.imgs_dir = NULL;
+
+		pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file);
+
 		rpc_cfg_file = req->config_file;
 		i = parse_options(0, NULL, &dummy, &dummy, PARSING_RPC_CONF);
-		if (i)
+		if (i) {
+			xfree(tmp_output);
+			xfree(tmp_work);
 			goto err;
+		}
+
+		/* If opts.{output,work_dir} is non-NULL, the RPC configuration file had a value, use it.*/
+		/* If opts.{output,work_dir} is NULL, use the old value if it was set. */
+		if (opts.output) {
+			output_changed_by_rpc_conf = true;
+		} else {
+			opts.output = tmp_output;
+			tmp_output = NULL;
+		}
+
+		if (opts.work_dir) {
+			work_changed_by_rpc_conf = true;
+		} else {
+			opts.work_dir = tmp_work;
+			tmp_work = NULL;
+		}
+
+		if (opts.imgs_dir)
+			imgs_changed_by_rpc_conf = true;
+
+		xfree(tmp_output);
+		xfree(tmp_work);
 	}
 
+	if (resolve_images_dir_path(images_dir_path, imgs_changed_by_rpc_conf, req, ids.pid) < 0)
+		goto err;
+
+	if (req->parent_img)
+		SET_CHAR_OPTS(img_parent, req->parent_img);
+
+	if (setup_images_and_workdir(images_dir_path, work_changed_by_rpc_conf, req, ids.pid))
+		goto err;
+
+	if (req->n_irmap_scan_paths) {
+		for (i = 0; i < req->n_irmap_scan_paths; i++) {
+			if (irmap_scan_path_add(req->irmap_scan_paths[i]))
+				goto err;
+		}
+	}
+
+	/* initiate log file in work dir */
+	if (setup_logging_from_req(req, output_changed_by_rpc_conf))
+		goto err;
+
+	if (check_caps())
+		goto err;
+
+	if (kerndat_init())
+		goto err;
+
+	/* init_pidfd_store_sk must be called after kerndat_init. */
+	if (req->has_pidfd_store_sk && init_pidfd_store_sk(ids.pid, req->pidfd_store_sk))
+		goto err;
+
 	if (req->mntns_compat_mode)
 		opts.mntns_compat_mode = true;
 
-	log_set_loglevel(opts.log_level);
 	if (check_options())
 		goto err;
 
@@ -742,7 +840,7 @@ static int dump_using_req(int sk, CriuOpts *req)
 	if (setup_opts_from_req(sk, req))
 		goto exit;
 
-	setproctitle("dump --rpc -t %d -D %s", req->pid, images_dir);
+	__setproctitle("dump --rpc -t %d", req->pid);
 
 	if (init_pidfd_store_hash())
 		goto pidfd_store_err;
@@ -785,7 +883,7 @@ static int restore_using_req(int sk, CriuOpts *req)
 	if (setup_opts_from_req(sk, req))
 		goto exit;
 
-	setproctitle("restore --rpc -D %s", images_dir);
+	__setproctitle("restore --rpc");
 
 	if (cr_restore_tasks())
 		goto exit;
@@ -824,6 +922,11 @@ static int check(int sk, CriuOpts *req)
 
 	resp.type = CRIU_REQ_TYPE__CHECK;
 
+	if (log_keep_err()) {
+		pr_perror("Can't tune log");
+		goto out;
+	}
+
 	pid = fork();
 	if (pid < 0) {
 		pr_perror("Can't fork");
@@ -831,7 +934,7 @@ static int check(int sk, CriuOpts *req)
 	}
 
 	if (pid == 0) {
-		setproctitle("check --rpc");
+		__setproctitle("check --rpc");
 
 		opts.mode = CR_CHECK;
 		if (setup_opts_from_req(sk, req))
@@ -848,6 +951,7 @@ static int check(int sk, CriuOpts *req)
 
 	resp.success = true;
 out:
+	set_resp_err(&resp);
 	return send_criu_msg(sk, &resp);
 }
 
@@ -856,6 +960,11 @@ static int pre_dump_using_req(int sk, CriuOpts *req, bool single)
 	int pid, status;
 	bool success = false;
 
+	if (log_keep_err()) {
+		pr_perror("Can't tune log");
+		goto out;
+	}
+
 	pid = fork();
 	if (pid < 0) {
 		pr_perror("Can't fork");
@@ -869,7 +978,7 @@ static int pre_dump_using_req(int sk, CriuOpts *req, bool single)
 		if (setup_opts_from_req(sk, req))
 			goto cout;
 
-		setproctitle("pre-dump --rpc -t %d -D %s", req->pid, images_dir);
+		__setproctitle("pre-dump --rpc -t %d", req->pid);
 
 		if (init_pidfd_store_hash())
 			goto pidfd_store_err;
@@ -934,6 +1043,11 @@ static int start_page_server_req(int sk, CriuOpts *req, bool daemon_mode)
 	CriuPageServerInfo ps = CRIU_PAGE_SERVER_INFO__INIT;
 	struct ps_info info;
 
+	if (log_keep_err()) {
+		pr_perror("Can't tune log");
+		goto out;
+	}
+
 	if (pipe(start_pipe)) {
 		pr_perror("No start pipe");
 		goto out;
@@ -947,7 +1061,7 @@ static int start_page_server_req(int sk, CriuOpts *req, bool daemon_mode)
 		if (setup_opts_from_req(sk, req))
 			goto out_ch;
 
-		setproctitle("page-server --rpc --address %s --port %hu", opts.addr, opts.port);
+		__setproctitle("page-server --rpc --address %s --port %hu", opts.addr, opts.port);
 
 		pr_debug("Starting page server\n");
 
@@ -1007,6 +1121,7 @@ static int start_page_server_req(int sk, CriuOpts *req, bool daemon_mode)
 out:
 	resp.type = CRIU_REQ_TYPE__PAGE_SERVER;
 	resp.success = success;
+	set_resp_err(&resp);
 	return send_criu_msg(sk, &resp);
 }
 
@@ -1107,7 +1222,7 @@ static int handle_feature_check(int sk, CriuReq *msg)
 		if (kerndat_init())
 			exit(1);
 
-		setproctitle("feature-check --rpc");
+		__setproctitle("feature-check --rpc");
 
 		if ((msg->features->has_mem_track == 1) && (msg->features->mem_track == true))
 			feat.mem_track = kdat.has_dirty_track;
@@ -1181,6 +1296,11 @@ static int handle_cpuinfo(int sk, CriuReq *msg)
 	bool success = false;
 	int pid, status;
 
+	if (log_keep_err()) {
+		pr_perror("Can't tune log");
+		goto out;
+	}
+
 	pid = fork();
 	if (pid < 0) {
 		pr_perror("Can't fork");
@@ -1190,12 +1310,11 @@ static int handle_cpuinfo(int sk, CriuReq *msg)
 	if (pid == 0) {
 		int ret = 1;
 
-		opts.mode = CR_CPUINFO;
+		opts.mode = (msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP) ? CR_CPUINFO_DUMP : CR_CPUINFO_CHECK;
 		if (setup_opts_from_req(sk, msg->opts))
 			goto cout;
 
-		setproctitle("cpuinfo %s --rpc -D %s", msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP ? "dump" : "check",
-			     images_dir);
+		__setproctitle("cpuinfo %s --rpc", msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP ? "dump" : "check");
 
 		if (msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP)
 			ret = cpuinfo_dump();
@@ -1230,7 +1349,7 @@ static int handle_cpuinfo(int sk, CriuReq *msg)
 out:
 	resp.type = msg->type;
 	resp.success = success;
-
+	set_resp_err(&resp);
 	return send_criu_msg(sk, &resp);
 }
 
@@ -1239,6 +1358,14 @@ int cr_service_work(int sk)
 	int ret = -1;
 	CriuReq *msg = 0;
 
+	/*
+	 * util_init initializes criu_run_id and compel_run_id so that sockets
+	 * are generated with an unique name identifying the specific process
+	 * even in cases where multiple processes with the same pid in
+	 * different pid namespaces are sharing the same network namespace.
+	 */
+	util_init();
+
 more:
 	opts.mode = CR_SWRK;
 
diff --git a/criu/crtools.c b/criu/crtools.c
index cc8d9179f..4dc55a065 100644
--- a/criu/crtools.c
+++ b/criu/crtools.c
@@ -54,19 +54,17 @@ void flush_early_log_to_stderr(void)
 	flush_early_log_buffer(STDERR_FILENO);
 }
 
-static int image_dir_mode(char *argv[], int optind)
+static int image_dir_mode(void)
 {
 	switch (opts.mode) {
 	case CR_DUMP:
 		/* fallthrough */
+	case CR_CPUINFO_DUMP:
+		/* fallthrough */
 	case CR_PRE_DUMP:
 		return O_DUMP;
 	case CR_RESTORE:
 		return O_RSTR;
-	case CR_CPUINFO:
-		if (!strcmp(argv[optind + 1], "dump"))
-			return O_DUMP;
-		/* fallthrough */
 	default:
 		return -1;
 	}
@@ -76,36 +74,55 @@ static int image_dir_mode(char *argv[], int optind)
 	return -1;
 }
 
-static int parse_criu_mode(char *mode)
-{
-	if (!strcmp(mode, "dump"))
-		opts.mode = CR_DUMP;
-	else if (!strcmp(mode, "pre-dump"))
-		opts.mode = CR_PRE_DUMP;
-	else if (!strcmp(mode, "restore"))
-		opts.mode = CR_RESTORE;
-	else if (!strcmp(mode, "lazy-pages"))
-		opts.mode = CR_LAZY_PAGES;
-	else if (!strcmp(mode, "check"))
-		opts.mode = CR_CHECK;
-	else if (!strcmp(mode, "page-server"))
-		opts.mode = CR_PAGE_SERVER;
-	else if (!strcmp(mode, "service"))
-		opts.mode = CR_SERVICE;
-	else if (!strcmp(mode, "swrk"))
-		opts.mode = CR_SWRK;
-	else if (!strcmp(mode, "dedup"))
-		opts.mode = CR_DEDUP;
-	else if (!strcmp(mode, "cpuinfo"))
-		opts.mode = CR_CPUINFO;
-	else if (!strcmp(mode, "exec"))
-		opts.mode = CR_EXEC_DEPRECATED;
-	else if (!strcmp(mode, "show"))
-		opts.mode = CR_SHOW_DEPRECATED;
-	else
-		return -1;
+struct {
+	char *cmd;
+	int mode;
+} commands[] = {
+	{ "dump", CR_DUMP },
+	{ "pre-dump", CR_PRE_DUMP },
+	{ "restore", CR_RESTORE },
+	{ "lazy-pages", CR_LAZY_PAGES },
+	{ "check", CR_CHECK },
+	{ "page-server", CR_PAGE_SERVER },
+	{ "service", CR_SERVICE },
+	{ "swrk", CR_SWRK },
+	{ "dedup", CR_DEDUP },
+	{ "exec", CR_EXEC_DEPRECATED },
+	{ "show", CR_SHOW_DEPRECATED },
+};
 
-	return 0;
+static int parse_criu_mode(int argc, char **argv, int *optind)
+{
+	char *cmd = argv[*optind];
+	bool has_sub_command = (argc - *optind) > 1;
+	char *subcommand = has_sub_command ? argv[*optind + 1] : NULL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(commands); i++) {
+		if (strcmp(cmd, commands[i].cmd))
+			continue;
+		opts.mode = commands[i].mode;
+		return 0;
+	}
+
+	if (!strcmp(cmd, "cpuinfo")) {
+		if (subcommand == NULL) {
+			pr_err("cpuinfo requires an action: dump or check\n");
+			return -1;
+		}
+		if (!strcmp(subcommand, "dump"))
+			opts.mode = CR_CPUINFO_DUMP;
+		else if (!strcmp(subcommand, "check"))
+			opts.mode = CR_CPUINFO_CHECK;
+		else {
+			pr_err("unknown cpuinfo sub-command: %s\n", subcommand);
+			return -1;
+		}
+		(*optind)++;
+		return 0;
+	}
+	pr_err("unknown command: %s\n", argv[*optind]);
+	return -1;
 }
 
 int main(int argc, char *argv[], char *envp[])
@@ -115,6 +132,7 @@ int main(int argc, char *argv[], char *envp[])
 	bool has_exec_cmd = false;
 	bool has_sub_command;
 	int state = PARSING_GLOBAL_CONF;
+	char *cmd;
 
 	BUILD_BUG_ON(CTL_32 != SYSCTL_TYPE__CTL_32);
 	BUILD_BUG_ON(__CTL_STR != SYSCTL_TYPE__CTL_STR);
@@ -127,7 +145,7 @@ int main(int argc, char *argv[], char *envp[])
 	}
 
 	cr_pb_init();
-	setproctitle_init(argc, argv, envp);
+	__setproctitle_init(argc, argv, envp);
 
 	if (argc < 2)
 		goto usage;
@@ -165,11 +183,18 @@ int main(int argc, char *argv[], char *envp[])
 		return 1;
 	}
 
-	if (parse_criu_mode(argv[optind])) {
-		pr_err("unknown command: %s\n", argv[optind]);
+	cmd = argv[optind];
+	ret = parse_criu_mode(argc, argv, &optind);
+	if (ret)
 		goto usage;
-	}
 
+	/*
+	 * util_init initializes criu_run_id and compel_run_id so that sockets
+	 * are generated with an unique name identifying the specific process
+	 * even in cases where multiple processes with the same pid in
+	 * different pid namespaces are sharing the same network namespace.
+	 */
+	util_init();
 	if (opts.mode == CR_SWRK) {
 		if (argc != optind + 2) {
 			fprintf(stderr, "Usage: criu swrk <fd>\n");
@@ -185,6 +210,9 @@ int main(int argc, char *argv[], char *envp[])
 		return cr_service_work(atoi(argv[optind + 1]));
 	}
 
+	if (check_caps())
+		return 1;
+
 	if (opts.imgs_dir == NULL)
 		SET_CHAR_OPTS(imgs_dir, ".");
 
@@ -214,25 +242,19 @@ int main(int argc, char *argv[], char *envp[])
 			return 1;
 		memcpy(opts.exec_cmd, &argv[optind + 1], (argc - optind - 1) * sizeof(char *));
 		opts.exec_cmd[argc - optind - 1] = NULL;
-	} else {
-		/* No subcommands except for cpuinfo and restore --exec-cmd */
-		if (opts.mode != CR_CPUINFO && has_sub_command) {
-			pr_err("excessive parameter%s for command %s\n", (argc - optind) > 2 ? "s" : "", argv[optind]);
-			goto usage;
-		} else if (opts.mode == CR_CPUINFO && !has_sub_command) {
-			pr_err("cpuinfo requires an action: dump or check\n");
-			goto usage;
-		}
+	} else if (has_sub_command) {
+		pr_err("excessive parameter%s for command %s\n", (argc - optind) > 2 ? "s" : "", cmd);
+		goto usage;
 	}
 
-	if (opts.stream && image_dir_mode(argv, optind) == -1) {
-		pr_err("--stream cannot be used with the %s command\n", argv[optind]);
+	if (opts.stream && image_dir_mode() == -1) {
+		pr_err("--stream cannot be used with the %s command\n", cmd);
 		goto usage;
 	}
 
 	/* We must not open imgs dir, if service is called */
 	if (opts.mode != CR_SERVICE) {
-		ret = open_image_dir(opts.imgs_dir, image_dir_mode(argv, optind));
+		ret = open_image_dir(opts.imgs_dir, image_dir_mode());
 		if (ret < 0) {
 			pr_err("Couldn't open image dir %s\n", opts.imgs_dir);
 			return 1;
@@ -251,8 +273,6 @@ int main(int argc, char *argv[], char *envp[])
 		return 1;
 	}
 
-	util_init();
-
 	if (log_init(opts.output))
 		return 1;
 
@@ -279,14 +299,13 @@ int main(int argc, char *argv[], char *envp[])
 	if (opts.img_parent)
 		pr_info("Will do snapshot from %s\n", opts.img_parent);
 
-	if (opts.mode == CR_DUMP) {
+	switch (opts.mode) {
+	case CR_DUMP:
 		if (!opts.tree_id)
 			goto opt_pid_missing;
 
 		return cr_dump_tasks(opts.tree_id);
-	}
-
-	if (opts.mode == CR_PRE_DUMP) {
+	case CR_PRE_DUMP:
 		if (!opts.tree_id)
 			goto opt_pid_missing;
 
@@ -296,9 +315,7 @@ int main(int argc, char *argv[], char *envp[])
 		}
 
 		return cr_pre_dump_tasks(opts.tree_id) != 0;
-	}
-
-	if (opts.mode == CR_RESTORE) {
+	case CR_RESTORE:
 		if (opts.tree_id)
 			pr_warn("Using -t with criu restore is obsoleted\n");
 
@@ -311,46 +328,41 @@ int main(int argc, char *argv[], char *envp[])
 		}
 
 		return ret != 0;
-	}
 
-	if (opts.mode == CR_LAZY_PAGES)
+	case CR_LAZY_PAGES:
 		return cr_lazy_pages(opts.daemon_mode) != 0;
 
-	if (opts.mode == CR_CHECK)
+	case CR_CHECK:
 		return cr_check() != 0;
 
-	if (opts.mode == CR_PAGE_SERVER)
+	case CR_PAGE_SERVER:
 		return cr_page_server(opts.daemon_mode, false, -1) != 0;
 
-	if (opts.mode == CR_SERVICE)
+	case CR_SERVICE:
 		return cr_service(opts.daemon_mode);
 
-	if (opts.mode == CR_DEDUP)
+	case CR_DEDUP:
 		return cr_dedup() != 0;
 
-	if (opts.mode == CR_CPUINFO) {
-		if (!argv[optind + 1]) {
-			pr_err("cpuinfo requires an action: dump or check\n");
-			goto usage;
-		}
-		if (!strcmp(argv[optind + 1], "dump"))
-			return cpuinfo_dump();
-		else if (!strcmp(argv[optind + 1], "check"))
-			return cpuinfo_check();
-	}
+	case CR_CPUINFO_DUMP:
+		return cpuinfo_dump();
 
-	if (opts.mode == CR_EXEC_DEPRECATED) {
+	case CR_CPUINFO_CHECK:
+		return cpuinfo_check();
+
+	case CR_EXEC_DEPRECATED:
 		pr_err("The \"exec\" action is deprecated by the Compel library.\n");
 		return -1;
-	}
 
-	if (opts.mode == CR_SHOW_DEPRECATED) {
+	case CR_SHOW_DEPRECATED:
 		pr_err("The \"show\" action is deprecated by the CRIT utility.\n");
 		pr_err("To view an image use the \"crit decode -i $name --pretty\" command.\n");
 		return -1;
-	}
 
-	pr_err("unknown command: %s\n", argv[optind]);
+	case CR_UNSET:
+	default:
+		pr_err("unknown command: %s\n", cmd);
+	}
 usage:
 	pr_msg("\n"
 	       "Usage:\n"
@@ -411,9 +423,10 @@ usage:
 	       "  --mntns-compat-mode   Use mount engine in compatibility mode. By default criu\n"
 	       "                        tries to use mount-v2 mode with more reliable algorithm\n"
 	       "                        based on MOVE_MOUNT_SET_GROUP kernel feature\n"
-	       "  --network-lock METHOD\n"
-	       "                      network locking/unlocking method; argument\n"
-	       "                      can be 'nftables' or 'iptables' (default).\n"
+	       "  --network-lock METHOD network locking/unlocking method; argument\n"
+	       "                        can be 'nftables' or 'iptables' (default).\n"
+	       "  --unprivileged        accept limitations when running as non-root\n"
+	       "  --allow-uprobes       allow dump/restore with uprobes vma\n"
 	       "\n"
 	       "* External resources support:\n"
 	       "  --external RES        dump objects from this list as external resources:\n"
@@ -441,6 +454,7 @@ usage:
 	       "                        is inaccessible\n"
 	       "  --link-remap          allow one to link unlinked files back when possible\n"
 	       "  --ghost-limit size    limit max size of deleted file contents inside image\n"
+	       "  --ghost-fiemap        enable dumping of deleted files using fiemap\n"
 	       "  --action-script FILE  add an external action script\n"
 	       "  -j|--" OPT_SHELL_JOB "        allow one to dump and restore shell jobs\n"
 	       "  -l|--" OPT_FILE_LOCKS "       handle file locks, for safety, only used for container\n"
@@ -489,8 +503,8 @@ usage:
 	       "                        Inherit file descriptors, treating fd NUM as being\n"
 	       "                        already opened via an existing RES, which can be:\n"
 	       "                            tty[rdev:dev]\n"
-	       "                            pipe[inode]\n"
-	       "                            socket[inode]\n"
+	       "                            pipe:[inode]\n"
+	       "                            socket:[inode]\n"
 	       "                            file[mnt_id:inode]\n"
 	       "                            /memfd:name\n"
 	       "                            path/to/file\n"
@@ -504,6 +518,9 @@ usage:
 	       "  --file-validation METHOD\n"
 	       "			pass the validation method to be used; argument\n"
 	       "			can be 'filesize' or 'buildid' (default).\n"
+	       "  --skip-file-rwx-check\n"
+	       "			Skip checking file permissions\n"
+	       "			(r/w/x for u/g/o) on restore.\n"
 	       "\n"
 	       "Check options:\n"
 	       "  Without options, \"criu check\" checks availability of absolutely required\n"
diff --git a/criu/fault-injection.c b/criu/fault-injection.c
index 83dc1fc8d..5dd9acf60 100644
--- a/criu/fault-injection.c
+++ b/criu/fault-injection.c
@@ -1,6 +1,7 @@
 #include <stdlib.h>
 #include "criu-log.h"
 #include "fault-injection.h"
+#include "seize.h"
 
 enum faults fi_strategy;
 
@@ -21,5 +22,13 @@ int fault_injection_init(void)
 	}
 
 	fi_strategy = start;
+
+	switch (fi_strategy) {
+	case FI_COMPEL_INTERRUPT_ONLY_MODE:
+		set_compel_interrupt_only_mode();
+		break;
+	default:
+		break;
+	};
 	return 0;
 }
diff --git a/criu/fdstore.c b/criu/fdstore.c
index 6a7f73a59..6ac639c55 100644
--- a/criu/fdstore.c
+++ b/criu/fdstore.c
@@ -13,6 +13,9 @@
 #include "rst-malloc.h"
 #include "log.h"
 #include "util.h"
+#include "cr_options.h"
+#include "util-caps.h"
+#include "sockets.h"
 
 /* clang-format off */
 static struct fdstore_desc {
@@ -49,15 +52,13 @@ int fdstore_init(void)
 		return -1;
 	}
 
-	if (setsockopt(sk, SOL_SOCKET, SO_SNDBUFFORCE, &buf[0], sizeof(buf[0])) < 0 ||
-	    setsockopt(sk, SOL_SOCKET, SO_RCVBUFFORCE, &buf[1], sizeof(buf[1])) < 0) {
-		pr_perror("Unable to set SO_SNDBUFFORCE/SO_RCVBUFFORCE");
+	if (sk_setbufs(sk, buf)) {
 		close(sk);
 		return -1;
 	}
 
 	addr.sun_family = AF_UNIX;
-	addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%" PRIx64 "-%" PRIx64, st.st_ino,
+	addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%" PRIx64 "-%s", st.st_ino,
 			   criu_run_id);
 	addrlen += sizeof(addr.sun_family);
 
diff --git a/criu/files-ext.c b/criu/files-ext.c
index 95ec8e37c..4cc99d921 100644
--- a/criu/files-ext.c
+++ b/criu/files-ext.c
@@ -45,10 +45,11 @@ static int open_fd(struct file_desc *d, int *new_fd)
 {
 	struct ext_file_info *xfi;
 	int fd;
+	bool retry_needed;
 
 	xfi = container_of(d, struct ext_file_info, d);
 
-	fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id);
+	fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id, &retry_needed);
 	if (fd < 0) {
 		pr_err("Unable to restore %#x\n", xfi->xfe->id);
 		return -1;
@@ -57,8 +58,11 @@ static int open_fd(struct file_desc *d, int *new_fd)
 	if (restore_fown(fd, xfi->xfe->fown))
 		return -1;
 
-	*new_fd = fd;
-	return 0;
+	if (!retry_needed)
+		*new_fd = fd;
+	else
+		*new_fd = -1;
+	return retry_needed;
 }
 
 static struct file_desc_ops ext_desc_ops = {
diff --git a/criu/files-reg.c b/criu/files-reg.c
index 0249063c2..66c0e6cda 100644
--- a/criu/files-reg.c
+++ b/criu/files-reg.c
@@ -11,8 +11,13 @@
 #include <sys/sendfile.h>
 #include <sched.h>
 #include <sys/capability.h>
-#include <sys/mount.h>
+#include <sys/ioctl.h>
 #include <elf.h>
+#include <linux/fiemap.h>
+#include <linux/fs.h>
+
+#include "tty.h"
+#include "stats.h"
 
 #ifndef SEEK_DATA
 #define SEEK_DATA 3
@@ -29,6 +34,8 @@
  * and checked.
  */
 #define BUILD_ID_MAP_SIZE 1048576
+#define ST_UNIT		  512
+#define EXTENT_MAX_COUNT  512
 
 #include "cr_options.h"
 #include "imgset.h"
@@ -79,7 +86,7 @@ static LIST_HEAD(ghost_files);
 /*
  * When opening remaps we first create a link on the remap
  * target, then open one, then unlink. In case the remap
- * source has more than one instance, these tree steps
+ * source has more than one instance, these three steps
  * should be serialized with each other.
  */
 static mutex_t *remap_open_lock;
@@ -218,6 +225,92 @@ static int copy_file_to_chunks(int fd, struct cr_img *img, size_t file_size)
 	return 0;
 }
 
+static int skip_outstanding(struct fiemap_extent *fe, size_t file_size)
+{
+	/* Skip outstanding extent */
+	if (fe->fe_logical > file_size)
+		return 1;
+
+	/* Skip outstanding part of the extent */
+	if (fe->fe_logical + fe->fe_length > file_size)
+		fe->fe_length = file_size - fe->fe_logical;
+	return 0;
+}
+
+static int copy_file_to_chunks_fiemap(int fd, struct cr_img *img, size_t file_size)
+{
+	GhostChunkEntry ce = GHOST_CHUNK_ENTRY__INIT;
+	struct fiemap *fiemap_buf;
+	struct fiemap_extent *ext_buf;
+	int ext_buf_size, fie_buf_size;
+	off_t pos = 0;
+	unsigned int i;
+	int ret = 0;
+	int exit_code = 0;
+
+	ext_buf_size = EXTENT_MAX_COUNT * sizeof(struct fiemap_extent);
+	fie_buf_size = sizeof(struct fiemap) + ext_buf_size;
+
+	fiemap_buf = xzalloc(fie_buf_size);
+	if (!fiemap_buf) {
+		pr_perror("Out of memory when allocating fiemap");
+		return -1;
+	}
+
+	ext_buf = fiemap_buf->fm_extents;
+	fiemap_buf->fm_length = FIEMAP_MAX_OFFSET;
+	fiemap_buf->fm_flags |= FIEMAP_FLAG_SYNC;
+	fiemap_buf->fm_extent_count = EXTENT_MAX_COUNT;
+
+	do {
+		fiemap_buf->fm_start = pos;
+		memzero(ext_buf, ext_buf_size);
+		ret = ioctl(fd, FS_IOC_FIEMAP, fiemap_buf);
+		if (ret < 0) {
+			if (errno == EOPNOTSUPP) {
+				exit_code = -EOPNOTSUPP;
+			} else {
+				exit_code = -1;
+				pr_perror("fiemap ioctl() failed");
+			}
+			goto out;
+		} else if (fiemap_buf->fm_mapped_extents == 0) {
+			goto out;
+		}
+
+		for (i = 0; i < fiemap_buf->fm_mapped_extents; i++) {
+			if (skip_outstanding(&fiemap_buf->fm_extents[i], file_size))
+				continue;
+
+			ce.len = fiemap_buf->fm_extents[i].fe_length;
+			ce.off = fiemap_buf->fm_extents[i].fe_logical;
+
+			if (pb_write_one(img, &ce, PB_GHOST_CHUNK)) {
+				exit_code = -1;
+				goto out;
+			}
+
+			if (copy_chunk_from_file(fd, img_raw_fd(img), ce.off, ce.len)) {
+				exit_code = -1;
+				goto out;
+			}
+
+			if (fiemap_buf->fm_extents[i].fe_flags & FIEMAP_EXTENT_LAST) {
+				/* there are no extents left, break. */
+				goto out;
+			}
+		}
+
+		/* Record file's logical offset as pos */
+		pos = ce.len + ce.off;
+
+		/* Since there are still extents left, continue. */
+	} while (fiemap_buf->fm_mapped_extents == EXTENT_MAX_COUNT);
+out:
+	xfree(fiemap_buf);
+	return exit_code;
+}
+
 static int copy_chunk_to_file(int img, int fd, off_t off, size_t len)
 {
 	int ret;
@@ -314,46 +407,24 @@ static int mklnk_ghost(char *path, GhostFileEntry *gfe)
 static int ghost_apply_metadata(const char *path, GhostFileEntry *gfe)
 {
 	struct timeval tv[2];
-	int ret = -1;
 
-	if (S_ISLNK(gfe->mode)) {
-		if (lchown(path, gfe->uid, gfe->gid) < 0) {
-			pr_perror("Can't reset user/group on ghost %s", path);
-			goto err;
-		}
+	if (cr_fchpermat(AT_FDCWD, path, gfe->uid, gfe->gid, gfe->mode, AT_SYMLINK_NOFOLLOW) < 0)
+		return -1;
 
-		/*
-		 * We have no lchmod() function, and fchmod() will fail on
-		 * O_PATH | O_NOFOLLOW fd. Yes, we have fchmodat()
-		 * function and flag AT_SYMLINK_NOFOLLOW described in
-		 * man 2 fchmodat, but it is not currently implemented. %)
-		 */
-	} else {
-		if (chown(path, gfe->uid, gfe->gid) < 0) {
-			pr_perror("Can't reset user/group on ghost %s", path);
-			goto err;
-		}
+	if (!gfe->atim)
+		return 0;
 
-		if (chmod(path, gfe->mode)) {
-			pr_perror("Can't set perms %o on ghost %s", gfe->mode, path);
-			goto err;
-		}
+	tv[0].tv_sec = gfe->atim->tv_sec;
+	tv[0].tv_usec = gfe->atim->tv_usec;
+	tv[1].tv_sec = gfe->mtim->tv_sec;
+	tv[1].tv_usec = gfe->mtim->tv_usec;
+
+	if (lutimes(path, tv)) {
+		pr_perror("Can't set access and modification times on ghost %s", path);
+		return -1;
 	}
 
-	if (gfe->atim) {
-		tv[0].tv_sec = gfe->atim->tv_sec;
-		tv[0].tv_usec = gfe->atim->tv_usec;
-		tv[1].tv_sec = gfe->mtim->tv_sec;
-		tv[1].tv_usec = gfe->mtim->tv_usec;
-		if (lutimes(path, tv)) {
-			pr_perror("Can't set access and modification times on ghost %s", path);
-			goto err;
-		}
-	}
-
-	ret = 0;
-err:
-	return ret;
+	return 0;
 }
 
 static int create_ghost_dentry(char *path, GhostFileEntry *gfe, struct cr_img *img)
@@ -414,7 +485,7 @@ static int nomntns_create_ghost(struct ghost_file *gf, GhostFileEntry *gfe, stru
 	if (ghost_apply_metadata(path, gfe))
 		return -1;
 
-	strlcpy(gf->remap.rpath, path + 1, PATH_MAX);
+	__strlcpy(gf->remap.rpath, path + 1, PATH_MAX);
 	pr_debug("Remap rpath is %s\n", gf->remap.rpath);
 	return 0;
 }
@@ -545,7 +616,7 @@ static int open_remap_ghost(struct reg_file_info *rfi, RemapFilePathEntry *rpe)
 	gf->remap.rmnt_id = rfi->rfe->mnt_id;
 
 	if (S_ISDIR(gfe->mode))
-		strlcpy(gf->remap.rpath, rfi->path, PATH_MAX);
+		__strlcpy(gf->remap.rpath, rfi->path, PATH_MAX);
 	else
 		ghost_path(gf->remap.rpath, PATH_MAX, rfi, rpe);
 
@@ -910,10 +981,20 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de
 			goto err_out;
 		}
 
-		if (gfe.chunks)
-			ret = copy_file_to_chunks(fd, img, st->st_size);
-		else
+		if (gfe.chunks) {
+			if (opts.ghost_fiemap) {
+				ret = copy_file_to_chunks_fiemap(fd, img, st->st_size);
+				if (ret == -EOPNOTSUPP) {
+					pr_debug("file system don't support fiemap\n");
+					ret = copy_file_to_chunks(fd, img, st->st_size);
+				}
+			} else {
+				ret = copy_file_to_chunks(fd, img, st->st_size);
+			}
+		} else {
 			ret = copy_file(fd, img_raw_fd(img), st->st_size);
+		}
+
 		close(fd);
 		if (ret)
 			goto err_out;
@@ -946,8 +1027,8 @@ static int dump_ghost_remap(char *path, const struct stat *st, int lfd, u32 id,
 
 	pr_info("Dumping ghost file for fd %d id %#x\n", lfd, id);
 
-	if (st->st_size > opts.ghost_limit) {
-		pr_err("Can't dump ghost file %s of %" PRIu64 " size, increase limit\n", path, st->st_size);
+	if (st->st_blocks * ST_UNIT > opts.ghost_limit) {
+		pr_err("Can't dump ghost file %s of %" PRIu64 " size, increase limit\n", path, st->st_blocks * ST_UNIT);
 		return -1;
 	}
 
@@ -1035,7 +1116,6 @@ static int create_link_remap(char *path, int len, int lfd, u32 *idp, struct ns_i
 	RegFileEntry rfe = REG_FILE_ENTRY__INIT;
 	FownEntry fwn = FOWN_ENTRY__INIT;
 	int mntns_root;
-	int ret;
 	const struct stat *ost = &parms->stat;
 
 	if (!opts.link_remap_ok) {
@@ -1070,23 +1150,22 @@ static int create_link_remap(char *path, int len, int lfd, u32 *idp, struct ns_i
 	rfe.name = link_name + 1;
 
 	/* Any 'unique' name works here actually. Remap works by reg-file ids. */
-	snprintf(tmp + 1, sizeof(link_name) - (size_t)(tmp - link_name - 1), "link_remap.%d", rfe.id);
+	snprintf(tmp + 1, sizeof(link_name) - (size_t)(tmp - link_name) - 1, "link_remap.%d", rfe.id);
 
 	mntns_root = mntns_get_root_fd(nsid);
 
-again:
-	ret = linkat_hard(lfd, "", mntns_root, link_name, ost->st_uid, ost->st_gid, AT_EMPTY_PATH);
-	if (ret < 0 && errno == ENOENT) {
+	while (linkat_hard(lfd, "", mntns_root, link_name, ost->st_uid, ost->st_gid, AT_EMPTY_PATH) < 0) {
+		if (errno != ENOENT) {
+			pr_perror("Can't link remap to %s", path);
+			return -1;
+		}
+
 		/* Use grand parent, if parent directory does not exist. */
 		if (trim_last_parent(link_name) < 0) {
 			pr_err("trim failed: @%s@\n", link_name);
 			check_overlayfs_fallback(path, parms, fallback);
 			return -1;
 		}
-		goto again;
-	} else if (ret < 0) {
-		pr_perror("Can't link remap to %s", path);
-		return -1;
 	}
 
 	if (note_link_remap(link_name, nsid))
@@ -1571,22 +1650,10 @@ static int get_build_id_64(Elf64_Ehdr *file_header, unsigned char **build_id, co
  */
 static int get_build_id(const int fd, const struct stat *fd_status, unsigned char **build_id)
 {
-	char buf[SELFMAG + 1];
-	void *start_addr;
+	char *start_addr;
 	size_t mapped_size;
 	int ret = -1;
 
-	if (read(fd, buf, SELFMAG + 1) != SELFMAG + 1)
-		return -1;
-
-	/*
-	 * The first 4 bytes contain a magic number identifying the file as an
-	 * ELF file. They should contain the characters ‘\x7f’, ‘E’, ‘L’, and
-	 * ‘F’, respectively. These characters are together defined as ELFMAG.
-	 */
-	if (strncmp(buf, ELFMAG, SELFMAG))
-		return -1;
-
 	/*
 	 * If the build-id exists, then it will most likely be present in the
 	 * beginning of the file. Therefore at most only the first 1 MB of the
@@ -1594,16 +1661,25 @@ static int get_build_id(const int fd, const struct stat *fd_status, unsigned cha
 	 */
 	mapped_size = min_t(size_t, fd_status->st_size, BUILD_ID_MAP_SIZE);
 	start_addr = mmap(0, mapped_size, PROT_READ, MAP_PRIVATE | MAP_FILE, fd, 0);
-	if (start_addr == MAP_FAILED) {
+	if ((void*)start_addr == MAP_FAILED) {
 		pr_warn("Couldn't mmap file with fd %d\n", fd);
 		return -1;
 	}
 
-	if (buf[EI_CLASS] == ELFCLASS32)
-		ret = get_build_id_32(start_addr, build_id, fd, mapped_size);
-	if (buf[EI_CLASS] == ELFCLASS64)
-		ret = get_build_id_64(start_addr, build_id, fd, mapped_size);
+	/*
+	 * The first 4 bytes contain a magic number identifying the file as an
+	 * ELF file. They should contain the characters ‘\x7f’, ‘E’, ‘L’, and
+	 * ‘F’, respectively. These characters are together defined as ELFMAG.
+	 */
+	if (memcmp(start_addr, ELFMAG, SELFMAG))
+		goto out;
 
+	if (start_addr[EI_CLASS] == ELFCLASS32)
+		ret = get_build_id_32((Elf32_Ehdr *)start_addr, build_id, fd, mapped_size);
+	if (start_addr[EI_CLASS] == ELFCLASS64)
+		ret = get_build_id_64((Elf64_Ehdr *)start_addr, build_id, fd, mapped_size);
+
+out:
 	munmap(start_addr, mapped_size);
 	return ret;
 }
@@ -1688,6 +1764,7 @@ int dump_one_reg_file(int lfd, u32 id, const struct fd_parms *p)
 	int ret;
 	FileEntry fe = FILE_ENTRY__INIT;
 	RegFileEntry rfe = REG_FILE_ENTRY__INIT;
+	bool skip_for_shell_job = false;
 
 	if (!p->link) {
 		if (fill_fdlink(lfd, p, &_link))
@@ -1707,12 +1784,17 @@ int dump_one_reg_file(int lfd, u32 id, const struct fd_parms *p)
 
 	mi = lookup_mnt_id(p->mnt_id);
 	if (mi == NULL) {
-		pr_err("Can't lookup mount=%d for fd=%d path=%s\n", p->mnt_id, p->fd, link->name + 1);
-		return -1;
+		if (opts.shell_job && is_tty(p->stat.st_rdev, p->stat.st_dev)) {
+			skip_for_shell_job = true;
+		} else {
+			pr_err("Can't lookup mount=%d for fd=%d path=%s\n", p->mnt_id, p->fd, link->name + 1);
+			return -1;
+		}
 	}
 
-	if (mnt_is_overmounted(mi)) {
-		pr_err("Open files on overmounted mounts are not supported yet\n");
+	if (!skip_for_shell_job && mnt_is_overmounted(mi)) {
+		pr_err("Open files on overmounted mounts are not supported yet; mount=%d fd=%d path=%s\n",
+		       p->mnt_id, p->fd, link->name + 1);
 		return -1;
 	}
 
@@ -1731,7 +1813,7 @@ int dump_one_reg_file(int lfd, u32 id, const struct fd_parms *p)
 		return -1;
 	}
 
-	if (check_path_remap(link, p, lfd, id, mi->nsid))
+	if (!skip_for_shell_job && check_path_remap(link, p, lfd, id, mi->nsid))
 		return -1;
 	rfe.name = &link->name[1];
 ext:
@@ -2199,9 +2281,21 @@ ext:
 		if (!validate_file(tmp, &st, rfi))
 			goto err;
 
-		if (rfi->rfe->has_mode && (st.st_mode != rfi->rfe->mode)) {
-			pr_err("File %s has bad mode 0%o (expect 0%o)\n", rfi->path, (int)st.st_mode, rfi->rfe->mode);
-			goto err;
+		if (rfi->rfe->has_mode) {
+			mode_t curr_mode = st.st_mode;
+			mode_t saved_mode = rfi->rfe->mode;
+
+			if (opts.skip_file_rwx_check) {
+				curr_mode &= ~(S_IRWXU | S_IRWXG | S_IRWXO);
+				saved_mode &= ~(S_IRWXU | S_IRWXG | S_IRWXO);
+			}
+
+			if (curr_mode != saved_mode) {
+				pr_err("File %s has bad mode 0%o (expect 0%o)\n"
+				       "File r/w/x checks can be skipped with the --skip-file-rwx-check option\n",
+				       rfi->path, (int)curr_mode, saved_mode);
+				goto err;
+			}
 		}
 
 		/*
@@ -2409,9 +2503,10 @@ static int open_filemap(int pid, struct vma_area *vma)
 			 * using dup because dup returns a reference to the same struct file inside kernel, but we
 			 * cannot open a new FD.
 			 */
-			ret = dup(plugin_fd);
+			ret = plugin_fd;
 		} else if (vma->e->status & VMA_AREA_MEMFD) {
-			ret = memfd_open(vma->vmfd, &flags);
+			if (!inherited_fd(vma->vmfd, &ret))
+				ret = memfd_open(vma->vmfd, &flags, true);
 		} else {
 			ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags);
 		}
diff --git a/criu/files.c b/criu/files.c
index 8a2250e19..af4b8aeac 100644
--- a/criu/files.c
+++ b/criu/files.c
@@ -21,7 +21,7 @@
 #include "image.h"
 #include "common/list.h"
 #include "rst-malloc.h"
-#include "util-pie.h"
+#include "util-caps.h"
 #include "common/lock.h"
 #include "sockets.h"
 #include "pstree.h"
@@ -49,6 +49,7 @@
 #include "kerndat.h"
 #include "fdstore.h"
 #include "bpfmap.h"
+#include "pidfd.h"
 
 #include "protobuf.h"
 #include "util.h"
@@ -302,7 +303,7 @@ static int fixup_overlayfs(struct fd_parms *p, struct fd_link *link)
 		char buf[PATH_MAX];
 		int n;
 
-		strlcpy(buf, link->name, PATH_MAX);
+		__strlcpy(buf, link->name, PATH_MAX);
 		n = snprintf(link->name, PATH_MAX, "%s/%s", m->ns_mountpoint, buf + 2);
 		if (n >= PATH_MAX) {
 			pr_err("Not enough space to replace %s\n", buf);
@@ -544,6 +545,8 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts,
 			ops = &signalfd_dump_ops;
 		else if (is_timerfd_link(link))
 			ops = &timerfd_dump_ops;
+		else if (is_pidfd_link(link))
+			ops = &pidfd_dump_ops;
 #ifdef CONFIG_HAS_LIBBPF
 		else if (is_bpfmap_link(link))
 			ops = &bpfmap_dump_ops;
@@ -554,6 +557,11 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts,
 		return do_dump_gen_file(&p, lfd, ops, e);
 	}
 
+	if (p.fs_type == PID_FS_MAGIC) {
+		ops = &pidfd_dump_ops;
+		return do_dump_gen_file(&p, lfd, ops, e);
+	}
+
 	if (S_ISREG(p.stat.st_mode) || S_ISDIR(p.stat.st_mode) || S_ISLNK(p.stat.st_mode)) {
 		if (fill_fdlink(lfd, &p, &link))
 			return -1;
@@ -970,7 +978,7 @@ static int receive_fd(struct fdinfo_list_entry *fle);
 static void transport_name_gen(struct sockaddr_un *addr, int *len, int pid)
 {
 	addr->sun_family = AF_UNIX;
-	snprintf(addr->sun_path, UNIX_PATH_MAX, "x/crtools-fd-%d-%" PRIx64, pid, criu_run_id);
+	snprintf(addr->sun_path, UNIX_PATH_MAX, "x/crtools-fd-%d-%s", pid, criu_run_id);
 	*len = SUN_LEN(addr);
 	*addr->sun_path = '\0';
 }
@@ -1321,7 +1329,6 @@ int prepare_fds(struct pstree_item *me)
 		}
 	}
 
-	BUG_ON(current->pid->state == TASK_HELPER);
 	ret = open_fdinfos(me);
 
 	if (rsti(me)->fdt)
@@ -1346,10 +1353,35 @@ static int fchroot(int fd)
 	return chroot(".");
 }
 
+static int need_chroot(int saved_root)
+{
+	struct stat saved_root_stat, cur_root_stat;
+	int psd;
+
+	if (fstat(saved_root, &saved_root_stat) == -1) {
+		pr_perror("Failed to stat saved root dir");
+		return -1;
+	}
+
+	psd = open_pid_proc(PROC_SELF);
+	if (psd < 0) {
+		pr_perror("Failed to open PROC_SELF");
+		return -1;
+	}
+
+	if (fstatat(psd, "root", &cur_root_stat, 0) == -1) {
+		pr_perror("Failed to stat current root dir");
+		return -1;
+	}
+
+	return saved_root_stat.st_ino != cur_root_stat.st_ino || saved_root_stat.st_dev != cur_root_stat.st_dev;
+}
+
 int restore_fs(struct pstree_item *me)
 {
 	int dd_root = -1, dd_cwd = -1, ret, err = -1;
 	struct rst_info *ri = rsti(me);
+	bool do_chroot = true;
 
 	/*
 	 * First -- open both descriptors. We will not
@@ -1368,15 +1400,24 @@ int restore_fs(struct pstree_item *me)
 		goto out;
 	}
 
+	/*
+	 * In unprivileged mode chroot() may fail if we don't have
+	 * sufficient privileges, therefore only do it if the process
+	 * is actually chrooted.
+	 */
+	if (opts.unprivileged)
+		do_chroot = need_chroot(dd_root);
+
 	/*
 	 * Now do chroot/chdir. Chroot goes first as it calls chdir into
 	 * dd_root so we'd need to fix chdir after it anyway.
 	 */
-
-	ret = fchroot(dd_root);
-	if (ret < 0) {
-		pr_perror("Can't change root");
-		goto out;
+	if (do_chroot) {
+		ret = fchroot(dd_root);
+		if (ret < 0) {
+			pr_perror("Can't change root");
+			goto out;
+		}
 	}
 
 	ret = fchdir(dd_cwd);
@@ -1744,6 +1785,9 @@ static int collect_one_file(void *o, ProtobufCMessage *base, struct cr_img *i)
 	case FD_TYPES__MEMFD:
 		ret = collect_one_file_entry(fe, fe->memfd->id, &fe->memfd->base, &memfd_cinfo);
 		break;
+	case FD_TYPES__PIDFD:
+		ret = collect_one_file_entry(fe, fe->pidfd->id, &fe->pidfd->base, &pidfd_cinfo);
+		break;
 #ifdef CONFIG_HAS_LIBBPF
 	case FD_TYPES__BPFMAP:
 		ret = collect_one_file_entry(fe, fe->bpf->id, &fe->bpf->base, &bpfmap_cinfo);
@@ -1766,5 +1810,6 @@ int prepare_files(void)
 {
 	init_fdesc_hash();
 	init_sk_info_hash();
+	init_dead_pidfd_hash();
 	return collect_image(&files_cinfo);
 }
diff --git a/criu/filesystems.c b/criu/filesystems.c
index 890d5d06d..093e1c492 100644
--- a/criu/filesystems.c
+++ b/criu/filesystems.c
@@ -547,7 +547,8 @@ static int fusectl_dump(struct mount_info *pm)
 		}
 
 		for (it = mntinfo; it; it = it->next) {
-			if (it->fstype->code == FSTYPE__FUSE && id == kdev_minor(it->s_dev) && !it->external) {
+			if (it->fstype->code == FSTYPE__FUSE && id == kdev_minor(it->s_dev) &&
+			    !mnt_is_external_bind(it)) {
 				pr_err("%s is a fuse mount but not external\n", it->ns_mountpoint);
 				goto out;
 			}
diff --git a/criu/fsnotify.c b/criu/fsnotify.c
index 03711f0b2..8572dc2f3 100644
--- a/criu/fsnotify.c
+++ b/criu/fsnotify.c
@@ -183,7 +183,7 @@ static char *alloc_openable(unsigned int s_dev, unsigned long i_ino, FhEntry *f_
 				return path;
 			}
 		} else
-			pr_debug("\t\t\tnot openable as %s (%m)\n", __path);
+			pr_debug("\t\t\tnot openable as %s (%s)\n", __path, strerror(errno));
 	}
 
 err:
diff --git a/criu/hugetlb.c b/criu/hugetlb.c
index aa98662d8..866c4050f 100644
--- a/criu/hugetlb.c
+++ b/criu/hugetlb.c
@@ -35,6 +35,19 @@ int is_hugetlb_dev(dev_t dev, int *hugetlb_size_flag)
 	return 0;
 }
 
+int can_dump_with_memfd_hugetlb(dev_t dev, int *hugetlb_size_flag, const char *file_path, struct vma_area *vma)
+{
+	/*
+	 * Dump the hugetlb backed mapping using memfd_hugetlb when it is not
+	 * anonymous private mapping.
+	 */
+	if (kdat.has_memfd_hugetlb && is_hugetlb_dev(dev, hugetlb_size_flag) &&
+	    !((vma->e->flags & MAP_PRIVATE) && !strncmp(file_path, ANON_HUGEPAGE_PREFIX, ANON_HUGEPAGE_PREFIX_LEN)))
+		return 1;
+
+	return 0;
+}
+
 unsigned long get_size_from_hugetlb_flag(int flag)
 {
 	int i;
diff --git a/criu/image-desc.c b/criu/image-desc.c
index d65d9c098..2d87c7381 100644
--- a/criu/image-desc.c
+++ b/criu/image-desc.c
@@ -107,6 +107,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
 	FD_ENTRY_F(BPFMAP_FILE,	"bpfmap-file", O_NOBUF),
 	FD_ENTRY_F(BPFMAP_DATA,	"bpfmap-data", O_NOBUF),
 	FD_ENTRY(APPARMOR,	"apparmor"),
+	FD_ENTRY(PIDFD,		"pidfd"),
 
 	[CR_FD_STATS] = {
 		.fmt	= "stats-%s",
diff --git a/criu/image.c b/criu/image.c
index 353de48e8..91101c3eb 100644
--- a/criu/image.c
+++ b/criu/image.c
@@ -25,6 +25,15 @@ bool img_common_magic = true;
 TaskKobjIdsEntry *root_ids;
 u32 root_cg_set;
 Lsmtype image_lsm;
+char dump_criu_run_id[RUN_ID_HASH_LENGTH];
+
+struct inventory_plugin {
+	struct list_head node;
+	char *name;
+};
+
+struct list_head inventory_plugins_list = LIST_HEAD_INIT(inventory_plugins_list);
+static int n_inventory_plugins;
 
 int check_img_inventory(bool restore)
 {
@@ -86,6 +95,11 @@ int check_img_inventory(bool restore)
 		goto out_err;
 	}
 
+	if (restore && he->allow_uprobes && !opts.allow_uprobes) {
+		pr_err("Dumped with --" OPT_ALLOW_UPROBES ". Need to set it on restore as well.\n");
+		goto out_err;
+	}
+
 	if (restore) {
 		if (!he->has_network_lock_method) {
 			/*
@@ -99,6 +113,37 @@ int check_img_inventory(bool restore)
 		} else {
 			opts.network_lock_method = he->network_lock_method;
 		}
+
+		if (!he->plugins_entry) {
+			/* backwards compatibility: if the 'plugins_entry' field is missing,
+			 * all plugins should be enabled during restore.
+			 */
+			n_inventory_plugins = -1;
+		} else {
+			PluginsEntry *pe = he->plugins_entry;
+			for (int i = 0; i < pe->n_plugins; i++) {
+				if (add_inventory_plugin(pe->plugins[i]))
+					goto out_err;
+			}
+		}
+
+		/**
+		 * This contains the criu_run_id during dumping of the process.
+		 * For things like removing network locking (nftables) this
+		 * information is needed to identify the name of the network
+		 * locking table.
+		 */
+		if (he->dump_criu_run_id) {
+			strncpy(dump_criu_run_id, he->dump_criu_run_id, sizeof(dump_criu_run_id) - 1);
+			pr_info("Dump CRIU run id = %s\n", dump_criu_run_id);
+		} else {
+			/**
+			 * If restoring from an old image this is a marker
+			 * that no dump_criu_run_id exists.
+			 */
+			dump_criu_run_id[0] = NO_DUMP_CRIU_RUN_ID;
+		}
+
 	}
 
 	ret = 0;
@@ -110,8 +155,92 @@ out_close:
 	return ret;
 }
 
+/**
+ * Check if the 'plugins' field in the inventory image contains
+ * the specified plugin name. If found, the plugin is removed
+ * from the linked list.
+ */
+bool check_and_remove_inventory_plugin(const char *name, size_t n)
+{
+	if (n_inventory_plugins == -1)
+		return true; /* backwards compatibility */
+
+	if (n_inventory_plugins > 0) {
+		struct inventory_plugin *p, *tmp;
+
+		list_for_each_entry_safe(p, tmp, &inventory_plugins_list, node) {
+			if (!strncmp(name, p->name, n)) {
+				xfree(p->name);
+				list_del(&p->node);
+				xfree(p);
+				n_inventory_plugins--;
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+/**
+ * We expect during restore all loaded plugins to be removed from
+ * the inventory_plugins_list. If the list is not empty, show an
+ * error message for each missing plugin.
+ */
+int check_inventory_plugins(void)
+{
+	struct inventory_plugin *p;
+
+	if (n_inventory_plugins <= 0)
+		return 0;
+
+	list_for_each_entry(p, &inventory_plugins_list, node) {
+		pr_err("Missing required plugin: %s\n", p->name);
+	}
+
+	return -1;
+}
+
+/**
+ * Add plugin name to the inventory image. These values
+ * can be used to identify required plugins during restore.
+ */
+int add_inventory_plugin(const char *name)
+{
+	struct inventory_plugin *p;
+
+	p = xmalloc(sizeof(struct inventory_plugin));
+	if (p == NULL)
+		return -1;
+
+	p->name = xstrdup(name);
+	if (!p->name) {
+		xfree(p);
+		return -1;
+	}
+	list_add(&p->node, &inventory_plugins_list);
+	n_inventory_plugins++;
+
+	return 0;
+}
+
+void free_inventory_plugins_list(void)
+{
+	struct inventory_plugin *p, *tmp;
+
+	if (!list_empty(&inventory_plugins_list)) {
+		list_for_each_entry_safe(p, tmp, &inventory_plugins_list, node) {
+			xfree(p->name);
+			list_del(&p->node);
+			xfree(p);
+		}
+	}
+	n_inventory_plugins = 0;
+}
+
 int write_img_inventory(InventoryEntry *he)
 {
+	PluginsEntry pe = PLUGINS_ENTRY__INIT;
 	struct cr_img *img;
 	int ret;
 
@@ -121,8 +250,27 @@ int write_img_inventory(InventoryEntry *he)
 	if (!img)
 		return -1;
 
+	if (!list_empty(&inventory_plugins_list)) {
+		struct inventory_plugin *p;
+		int i = 0;
+
+		pe.n_plugins = n_inventory_plugins;
+		pe.plugins = xmalloc(n_inventory_plugins * sizeof(char *));
+		if (!pe.plugins)
+			return -1;
+
+		list_for_each_entry(p, &inventory_plugins_list, node) {
+			pe.plugins[i] = p->name;
+			i++;
+		}
+	}
+	he->plugins_entry = &pe;
+
 	ret = pb_write_one(img, he, PB_INVENTORY);
 
+	free_inventory_plugins_list();
+	xfree(pe.plugins);
+
 	xfree(he->root_ids);
 	close_image(img);
 	if (ret < 0)
@@ -226,8 +374,9 @@ int prepare_inventory(InventoryEntry *he)
 	if (get_task_ids(&crt.i))
 		return -1;
 
-	he->has_root_cg_set = true;
-	if (dump_task_cgroup(NULL, &he->root_cg_set, NULL))
+	if (!opts.unprivileged)
+		he->has_root_cg_set = true;
+	if (dump_thread_cgroup(NULL, &he->root_cg_set, NULL, -1))
 		return -1;
 
 	he->root_ids = crt.i.ids;
@@ -242,6 +391,17 @@ int prepare_inventory(InventoryEntry *he)
 	he->has_network_lock_method = true;
 	he->network_lock_method = opts.network_lock_method;
 
+	/**
+	 * This contains the criu_run_id during dumping of the process.
+	 * For things like removing network locking (nftables) this
+	 * information is needed to identify the name of the network
+	 * locking table.
+	 */
+	he->dump_criu_run_id = xstrdup(criu_run_id);
+
+	if (!he->dump_criu_run_id)
+		return -1;
+
 	return 0;
 }
 
@@ -557,7 +717,7 @@ struct cr_img *img_from_fd(int fd)
  * This is used when opts.stream is enabled for picking the right streamer
  * socket name. `mode` is ignored when opts.stream is not enabled.
  */
-int open_image_dir(char *dir, int mode)
+int open_image_dir(const char *dir, int mode)
 {
 	int fd, ret;
 
diff --git a/criu/img-streamer.c b/criu/img-streamer.c
index 7e36eae01..305e6fae5 100644
--- a/criu/img-streamer.c
+++ b/criu/img-streamer.c
@@ -12,6 +12,7 @@
 #include "rst-malloc.h"
 #include "common/scm.h"
 #include "common/lock.h"
+#include "action-scripts.h"
 
 /*
  * We use different path names for the dump and restore sockets because:
@@ -49,10 +50,17 @@ static const char *socket_name_for_mode(int mode)
 int img_streamer_init(const char *image_dir, int mode)
 {
 	struct sockaddr_un addr;
+	int pre_stream_ret;
 	int sockfd;
 
 	img_streamer_mode = mode;
 
+	pre_stream_ret = run_scripts(ACT_PRE_STREAM);
+	if (pre_stream_ret != 0) {
+		pr_err("Pre-stream script failed with %d!\n", pre_stream_ret);
+		return -1;
+	}
+
 	sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
 	if (sockfd < 0) {
 		pr_perror("Unable to instantiate UNIX socket");
diff --git a/criu/include/action-scripts.h b/criu/include/action-scripts.h
index c2e8850aa..6a331a32f 100644
--- a/criu/include/action-scripts.h
+++ b/criu/include/action-scripts.h
@@ -4,6 +4,7 @@
 #include "asm/int.h"
 
 enum script_actions {
+	ACT_PRE_STREAM,
 	ACT_PRE_DUMP,
 	ACT_POST_DUMP,
 	ACT_PRE_RESTORE,
@@ -16,6 +17,7 @@ enum script_actions {
 	ACT_PRE_RESUME,
 	ACT_ORPHAN_PTS_MASTER,
 	ACT_STATUS_READY,
+	ACT_QUERY_EXT_FILES,
 
 	ACT_MAX
 };
@@ -24,6 +26,8 @@ extern int add_script(char *path);
 extern int add_rpc_notify(int sk);
 extern int run_scripts(enum script_actions);
 extern int rpc_send_fd(enum script_actions, int fd);
+extern int rpc_query_external_files(void);
+extern int exec_rpc_query_external_files(char *name, int sk);
 extern int send_criu_rpc_script(enum script_actions act, char *name, int sk, int fd);
 
 #endif /* __CR_ACTION_SCRIPTS_H__ */
diff --git a/criu/include/aio.h b/criu/include/aio.h
index d1655739d..38e704020 100644
--- a/criu/include/aio.h
+++ b/criu/include/aio.h
@@ -1,7 +1,7 @@
 #ifndef __CR_AIO_H__
 #define __CR_AIO_H__
 
-#include <linux/aio_abi.h>
+#include "linux/aio_abi.h"
 #include "images/mm.pb-c.h"
 unsigned int aio_estimate_nr_reqs(unsigned int size);
 int dump_aio_ring(MmEntry *mme, struct vma_area *vma);
diff --git a/criu/include/cgroup-props.h b/criu/include/cgroup-props.h
index 11b677548..10a7061b8 100644
--- a/criu/include/cgroup-props.h
+++ b/criu/include/cgroup-props.h
@@ -10,6 +10,7 @@ typedef struct {
 } cgp_t;
 
 extern cgp_t cgp_global;
+extern cgp_t cgp_global_v2;
 extern const cgp_t *cgp_get_props(const char *name);
 extern bool cgp_should_skip_controller(const char *name);
 extern bool cgp_add_dump_controller(const char *name);
diff --git a/criu/include/cgroup.h b/criu/include/cgroup.h
index 2e9b8933c..dc264032e 100644
--- a/criu/include/cgroup.h
+++ b/criu/include/cgroup.h
@@ -7,9 +7,10 @@
 struct pstree_item;
 struct parasite_dump_cgroup_args;
 extern u32 root_cg_set;
-int dump_task_cgroup(struct pstree_item *, u32 *, struct parasite_dump_cgroup_args *args);
+int dump_thread_cgroup(const struct pstree_item *, u32 *, struct parasite_dump_cgroup_args *args, int id);
 int dump_cgroups(void);
-int prepare_task_cgroup(struct pstree_item *);
+int restore_task_cgroup(struct pstree_item *);
+int prepare_cgroup_namespace(struct pstree_item *);
 int prepare_cgroup(void);
 /* Restore things like cpu_limit in known cgroups. */
 int prepare_cgroup_properties(void);
@@ -60,6 +61,9 @@ struct cg_controller {
 
 	/* for cgroup list in cgroup.c */
 	struct list_head l;
+
+	/* controller is a threaded cgroup or not */
+	int is_threaded;
 };
 struct cg_controller *new_controller(const char *name);
 
@@ -87,9 +91,12 @@ struct cg_ctl {
  */
 struct list_head;
 struct parasite_dump_cgroup_args;
-extern int parse_task_cgroup(int pid, struct parasite_dump_cgroup_args *args, struct list_head *l, unsigned int *n);
+extern int parse_thread_cgroup(int pid, int tid, struct parasite_dump_cgroup_args *args, struct list_head *l,
+			       unsigned int *n);
 extern void put_ctls(struct list_head *);
 
 int collect_controllers(struct list_head *cgroups, unsigned int *n_cgroups);
 
+int stop_cgroupd(void);
+
 #endif /* __CR_CGROUP_H__ */
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
index bf1a762cc..8c5707b41 100644
--- a/criu/include/cr_options.h
+++ b/criu/include/cr_options.h
@@ -1,10 +1,12 @@
 #ifndef __CR_OPTIONS_H__
 #define __CR_OPTIONS_H__
 
-#include <sys/types.h>
 #include <stdbool.h>
+#include <sys/capability.h>
 #include "common/config.h"
 #include "common/list.h"
+#include "int.h"
+#include "image.h"
 
 /* Configuration and CLI parsing order defines */
 #define PARSING_GLOBAL_CONF  1
@@ -65,9 +67,18 @@ struct cg_root_opt {
 enum NETWORK_LOCK_METHOD {
 	NETWORK_LOCK_IPTABLES,
 	NETWORK_LOCK_NFTABLES,
+	NETWORK_LOCK_SKIP,
 };
 
+/**
+ * CRIU currently defaults to the iptables locking backend.
+ *
+ * It is, however, possible to change this by defining
+ * NETWORK_LOCK_DEFAULT to a different value on the command-line.
+ */
+#ifndef NETWORK_LOCK_DEFAULT
 #define NETWORK_LOCK_DEFAULT NETWORK_LOCK_IPTABLES
+#endif
 
 /*
  * Ghost file size we allow to carry by default.
@@ -93,6 +104,9 @@ enum FILE_VALIDATION_OPTIONS {
 /* This constant dictates which file validation method should be tried by default. */
 #define FILE_VALIDATION_DEFAULT FILE_VALIDATION_BUILD_ID
 
+/* This constant dictates that criu use fiemap to copy ghost file by default.*/
+#define FIEMAP_DEFAULT 1
+
 struct irmap;
 
 struct irmap_path_opt {
@@ -111,7 +125,8 @@ enum criu_mode {
 	CR_SERVICE,
 	CR_SWRK,
 	CR_DEDUP,
-	CR_CPUINFO,
+	CR_CPUINFO_DUMP,
+	CR_CPUINFO_CHECK,
 	CR_EXEC_DEPRECATED,
 	CR_SHOW_DEPRECATED,
 };
@@ -165,6 +180,7 @@ struct cr_options {
 	int enable_external_masters;
 	bool aufs; /* auto-detected, not via cli */
 	bool overlayfs;
+	int ghost_fiemap;
 #ifdef CONFIG_BINFMT_MISC_VIRTUALIZED
 	bool has_binfmt_misc; /* auto-detected */
 #endif
@@ -179,6 +195,8 @@ struct cr_options {
 	bool lazy_pages;
 	char *work_dir;
 	int network_lock_method;
+	int skip_file_rwx_check;
+	int allow_uprobes;
 
 	/*
 	 * When we scheduler for removal some functionality we first
@@ -209,6 +227,26 @@ struct cr_options {
 	enum criu_mode mode;
 
 	int mntns_compat_mode;
+
+	/* Remember the program name passed to main() so we can use it in
+	 * error messages elsewhere.
+	 */
+	char *argv_0;
+	/*
+	 * This contains the eUID of the current CRIU user. It
+	 * will only be set to a non-zero value if CRIU has
+	 * the necessary capabilities to run as non root.
+	 * CAP_CHECKPOINT_RESTORE or CAP_SYS_ADMIN
+	 */
+	uid_t uid;
+	/* This contains the value from capget()->effective */
+	u32 cap_eff[_LINUX_CAPABILITY_U32S_3];
+	/*
+	 * If CRIU should be running as non-root with the help of
+	 * CAP_CHECKPOINT_RESTORE or CAP_SYS_ADMIN the user should
+	 * explicitly request it as it comes with many limitations.
+	 */
+	int unprivileged;
 };
 
 extern struct cr_options opts;
diff --git a/criu/include/criu-log.h b/criu/include/criu-log.h
index ae2f38489..9d52fbdb1 100644
--- a/criu/include/criu-log.h
+++ b/criu/include/criu-log.h
@@ -26,7 +26,6 @@
 extern int log_init(const char *output);
 extern void log_fini(void);
 extern int log_init_by_pid(pid_t pid);
-extern void log_closedir(void);
 extern int log_keep_err(void);
 extern char *log_first_err(void);
 
diff --git a/criu/include/criu-plugin.h b/criu/include/criu-plugin.h
index 886832eaa..c3bea1385 100644
--- a/criu/include/criu-plugin.h
+++ b/criu/include/criu-plugin.h
@@ -56,6 +56,16 @@ enum {
 
 	CR_PLUGIN_HOOK__RESUME_DEVICES_LATE = 9,
 
+	CR_PLUGIN_HOOK__PAUSE_DEVICES = 10,
+
+	CR_PLUGIN_HOOK__CHECKPOINT_DEVICES = 11,
+
+	CR_PLUGIN_HOOK__POST_FORKING = 12,
+
+	CR_PLUGIN_HOOK__RESTORE_INIT = 13,
+
+	CR_PLUGIN_HOOK__DUMP_DEVICES_LATE = 14,
+
 	CR_PLUGIN_HOOK__MAX
 };
 
@@ -64,7 +74,7 @@ enum {
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_UNIX_SK, int fd, int id);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_UNIX_SK, int id);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_FILE, int fd, int id);
-DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id);
+DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id, bool *retry_needed);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_MOUNT, char *mountpoint, int id);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_MOUNT, int id, char *mountpoint, char *old_root, int *is_file);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_LINK, int index, int type, char *kind);
@@ -72,6 +82,11 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA, int fd, const struct
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, const char *path, const uint64_t addr,
 			 const uint64_t old_pgoff, uint64_t *new_pgoff, int *plugin_fd);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid);
+DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__PAUSE_DEVICES, int pid);
+DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, int pid);
+DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__POST_FORKING, void);
+DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_INIT, void);
+DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_DEVICES_LATE, int id);
 
 enum {
 	CR_PLUGIN_STAGE__DUMP,
@@ -146,5 +161,6 @@ typedef int(cr_plugin_handle_device_vma_t)(int fd, const struct stat *stat);
 typedef int(cr_plugin_update_vma_map_t)(const char *path, const uint64_t addr, const uint64_t old_pgoff,
 					uint64_t *new_pgoff, int *plugin_fd);
 typedef int(cr_plugin_resume_devices_late_t)(int pid);
+typedef int(cr_plugin_post_forking_t)(void);
 
 #endif /* __CRIU_PLUGIN_H__ */
diff --git a/criu/include/crtools.h b/criu/include/crtools.h
index b9309654f..b54b9d929 100644
--- a/criu/include/crtools.h
+++ b/criu/include/crtools.h
@@ -26,6 +26,7 @@ extern int cr_pre_dump_tasks(pid_t pid);
 extern int cr_restore_tasks(void);
 extern int convert_to_elf(char *elf_path, int fd_core);
 extern int cr_check(void);
+extern int check_caps(void);
 extern int cr_dedup(void);
 extern int cr_lazy_pages(bool daemon);
 
diff --git a/criu/include/fault-injection.h b/criu/include/fault-injection.h
index f33918de8..e987c18ce 100644
--- a/criu/include/fault-injection.h
+++ b/criu/include/fault-injection.h
@@ -19,19 +19,15 @@ enum faults {
 	FI_HUGE_ANON_SHMEM_ID = 132,
 	FI_CANNOT_MAP_VDSO = 133,
 	FI_CORRUPT_EXTREGS = 134,
+	FI_DONT_USE_PAGEMAP_SCAN = 135,
+	FI_DUMP_CRASH = 136,
+	FI_COMPEL_INTERRUPT_ONLY_MODE = 137,
+	FI_PLUGIN_CUDA_FORCE_ENABLE = 138,
 	FI_MAX,
 };
 
 static inline bool __fault_injected(enum faults f, enum faults fi_strategy)
 {
-	/*
-	 * Temporary workaround for Xen guests. Breakpoints degrade
-	 * performance linearly, so until we find out the reason,
-	 * let's disable them.
-	 */
-	if (f == FI_NO_BREAKPOINTS)
-		return true;
-
 	return fi_strategy == f;
 }
 
diff --git a/criu/include/fs-magic.h b/criu/include/fs-magic.h
index ad34f4891..ffc0455d5 100644
--- a/criu/include/fs-magic.h
+++ b/criu/include/fs-magic.h
@@ -57,4 +57,8 @@
 #define OVERLAYFS_SUPER_MAGIC 0x794c7630
 #endif
 
+#ifndef PID_FS_MAGIC
+#define PID_FS_MAGIC 0x50494446
+#endif
+
 #endif /* __CR_FS_MAGIC_H__ */
diff --git a/criu/include/hugetlb.h b/criu/include/hugetlb.h
index c0e83652b..9aee5bed3 100644
--- a/criu/include/hugetlb.h
+++ b/criu/include/hugetlb.h
@@ -4,6 +4,11 @@
 #include <sys/types.h>
 #include <stddef.h>
 
+#include "vma.h"
+
+#define ANON_HUGEPAGE_PREFIX	 "/anon_hugepage"
+#define ANON_HUGEPAGE_PREFIX_LEN (sizeof(ANON_HUGEPAGE_PREFIX) - 1)
+
 enum hugepage_size {
 	HUGETLB_16KB,
 	HUGETLB_64KB,
@@ -46,6 +51,7 @@ struct htlb_info {
 extern struct htlb_info hugetlb_info[HUGETLB_MAX];
 
 int is_hugetlb_dev(dev_t dev, int *hugetlb_size_flag);
+int can_dump_with_memfd_hugetlb(dev_t dev, int *hugetlb_size_flag, const char *file_path, struct vma_area *vma);
 unsigned long get_size_from_hugetlb_flag(int flag);
 
 #ifndef MFD_HUGETLB
diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h
index 9f369be64..79e1ac111 100644
--- a/criu/include/image-desc.h
+++ b/criu/include/image-desc.h
@@ -113,6 +113,7 @@ enum {
 	CR_FD_PIPES,
 	CR_FD_TTY_FILES,
 	CR_FD_MEMFD_FILE,
+	CR_FD_PIDFD,
 
 	CR_FD_AUTOFS,
 
diff --git a/criu/include/image.h b/criu/include/image.h
index 5cb01bde2..30e32323d 100644
--- a/criu/include/image.h
+++ b/criu/include/image.h
@@ -35,13 +35,15 @@
  *  - stack
  *  	the memory area is used in application stack so we
  *  	should be careful about guard page here
+ *  - shadow stack
+ *      the memory area is used by shadow stack
  *  - vsyscall
  *  	special memory area injected into the task memory
  *  	space by the kernel itself, represent virtual syscall
  *  	implementation and it is specific to every kernel version,
  *  	its contents should not be dumped ever
  *  - vdso,vvar
- *  	the vDSO area, it might reqire additional memory
+ *  	the vDSO area, it might require additional memory
  *  	contents modification especially when tasks are
  *  	migrating between different kernel versions
  *  - heap
@@ -66,6 +68,18 @@
  *  	processing exiting with error; while the rest of bits
  *  	are part of image ABI, this particular one must never
  *  	be used in image.
+ *  - guard
+ *  	stands for a fake VMA (not represented in the kernel
+ *  	by a struct vm_area_struct). Used to keep an information
+ *  	about virtual address space ranges covered by
+ *  	MADV_GUARD_INSTALL guards. These ones must be always at
+ *  	the end of the vma_area_list and properly skipped a.e.
+ *  - uprobes
+ *   	stands for a "[uprobes]" vma that's automatically mapped by
+ *   	the kernel when an active uprobe is hit. Contents of this vma
+ *   	are not dumped and neither are its madvise bits restored,
+ *   	because the kernel is in complete control of this vma. This is
+ *   	just used to track the existence of the uprobes vma.
  */
 #define VMA_AREA_NONE	  (0 << 0)
 #define VMA_AREA_REGULAR  (1 << 0)
@@ -84,6 +98,9 @@
 #define VMA_AREA_VVAR	 (1 << 12)
 #define VMA_AREA_AIORING (1 << 13)
 #define VMA_AREA_MEMFD	 (1 << 14)
+#define VMA_AREA_SHSTK	 (1 << 15)
+#define VMA_AREA_GUARD	 (1 << 16)
+#define VMA_AREA_UPROBES	(1 << 17)
 
 #define VMA_EXT_PLUGIN	  (1 << 27)
 #define VMA_CLOSE	  (1 << 28)
@@ -97,6 +114,8 @@
 
 #define CR_PARENT_LINK "parent"
 
+#define OPT_ALLOW_UPROBES "allow-uprobes"
+
 extern bool ns_per_id;
 extern bool img_common_magic;
 
@@ -146,7 +165,7 @@ static inline int img_raw_fd(struct cr_img *img)
 
 extern off_t img_raw_size(struct cr_img *img);
 
-extern int open_image_dir(char *dir, int mode);
+extern int open_image_dir(const char *dir, int mode);
 extern void close_image_dir(void);
 /*
  * Return -1 -- parent symlink points to invalid target
@@ -174,4 +193,8 @@ extern int read_img_str(struct cr_img *, char **pstr, int size);
 
 extern void close_image(struct cr_img *);
 
+extern int add_inventory_plugin(const char *name);
+extern int check_inventory_plugins(void);
+extern bool check_and_remove_inventory_plugin(const char *name, size_t n);
+
 #endif /* __CR_IMAGE_H__ */
diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
index 83d867e75..e4922f401 100644
--- a/criu/include/kerndat.h
+++ b/criu/include/kerndat.h
@@ -7,6 +7,7 @@
 #include "asm/kerndat.h"
 #include "util-vdso.h"
 #include "hugetlb.h"
+#include <compel/ptrace.h>
 
 struct stat;
 
@@ -82,6 +83,16 @@ struct kerndat_s {
 	bool has_openat2;
 	bool has_rseq;
 	bool has_ptrace_get_rseq_conf;
+	struct __ptrace_rseq_configuration libc_rseq_conf;
+	bool has_ipv6_freebind;
+	bool has_membarrier_get_registrations;
+	bool has_pagemap_scan;
+	bool has_shstk;
+	bool has_close_range;
+	bool has_timer_cr_ids;
+	bool has_breakpoints;
+	bool has_madv_guard;
+	bool has_pagemap_scan_guard_pages;
 };
 
 extern struct kerndat_s kdat;
@@ -104,4 +115,6 @@ extern int kerndat_fs_virtualized(unsigned int which, u32 kdev);
 
 extern int kerndat_has_nspid(void);
 
+extern void kerndat_warn_about_madv_guards(void);
+
 #endif /* __CR_KERNDAT_H__ */
diff --git a/criu/include/linux/aio_abi.h b/criu/include/linux/aio_abi.h
new file mode 100644
index 000000000..d9ce78720
--- /dev/null
+++ b/criu/include/linux/aio_abi.h
@@ -0,0 +1,14 @@
+#ifndef __LINUX__AIO_ABI_H
+#define __LINUX__AIO_ABI_H
+
+typedef __kernel_ulong_t aio_context_t;
+
+/* read() from /dev/aio returns these structures. */
+struct io_event {
+	__u64 data; /* the data field from the iocb */
+	__u64 obj;  /* what iocb this event came from */
+	__s64 res;  /* result code for this event */
+	__s64 res2; /* secondary result */
+};
+
+#endif /* __LINUX__AIO_ABI_H */
diff --git a/criu/include/linux/mount.h b/criu/include/linux/mount.h
index 9a3a28b10..fefafa89e 100644
--- a/criu/include/linux/mount.h
+++ b/criu/include/linux/mount.h
@@ -4,32 +4,40 @@
 #include "common/config.h"
 #include "compel/plugins/std/syscall-codes.h"
 
-#ifdef CONFIG_HAS_FSCONFIG
-#include <linux/mount.h>
-#else
+/* Copied from /usr/include/sys/mount.h */
+
+#ifndef FSOPEN_CLOEXEC
+/* The type of fsconfig call made.   */
 enum fsconfig_command {
-	FSCONFIG_SET_FLAG = 0,	      /* Set parameter, supplying no value */
-	FSCONFIG_SET_STRING = 1,      /* Set parameter, supplying a string value */
-	FSCONFIG_SET_BINARY = 2,      /* Set parameter, supplying a binary blob value */
-	FSCONFIG_SET_PATH = 3,	      /* Set parameter, supplying an object by path */
-	FSCONFIG_SET_PATH_EMPTY = 4,  /* Set parameter, supplying an object by (empty) path */
-	FSCONFIG_SET_FD = 5,	      /* Set parameter, supplying an object by fd */
-	FSCONFIG_CMD_CREATE = 6,      /* Invoke superblock creation */
+	FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */
+#define FSCONFIG_SET_FLAG FSCONFIG_SET_FLAG
+	FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */
+#define FSCONFIG_SET_STRING FSCONFIG_SET_STRING
+	FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */
+#define FSCONFIG_SET_BINARY FSCONFIG_SET_BINARY
+	FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */
+#define FSCONFIG_SET_PATH FSCONFIG_SET_PATH
+	FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */
+#define FSCONFIG_SET_PATH_EMPTY FSCONFIG_SET_PATH_EMPTY
+	FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */
+#define FSCONFIG_SET_FD FSCONFIG_SET_FD
+	FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */
+#define FSCONFIG_CMD_CREATE FSCONFIG_CMD_CREATE
 	FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
+#define FSCONFIG_CMD_RECONFIGURE FSCONFIG_CMD_RECONFIGURE
 };
+
+#endif // FSOPEN_CLOEXEC
+
+/* fsopen flags. With the redundant definition, we check if the kernel,
+ * glibc value and our value still match.
+ */
+#define FSOPEN_CLOEXEC 0x00000001
+
+#ifndef MS_MGC_VAL
+/* Magic mount flag number. Has to be or-ed to the flag values.  */
+#define MS_MGC_VAL 0xc0ed0000 /* Magic flag number to indicate "new" flags */
+#define MS_MGC_MSK 0xffff0000 /* Magic flag number mask */
 #endif
 
-static inline int sys_fsopen(const char *fsname, unsigned int flags)
-{
-	return syscall(__NR_fsopen, fsname, flags);
-}
-static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux)
-{
-	return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
-}
-static inline int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags)
-{
-	return syscall(__NR_fsmount, fd, flags, attr_flags);
-}
-
 #endif
diff --git a/criu/include/linux/rseq.h b/criu/include/linux/rseq.h
index a47876e66..5ceefbf8e 100644
--- a/criu/include/linux/rseq.h
+++ b/criu/include/linux/rseq.h
@@ -9,7 +9,12 @@
 #endif
 #endif
 
-#ifndef __GLIBC_HAVE_KERNEL_RSEQ
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#include "common/config.h"
+
+#ifdef CONFIG_HAS_NO_LIBC_RSEQ_DEFS
 /*
  * linux/rseq.h
  *
@@ -18,9 +23,6 @@
  * Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  */
 
-#include <linux/types.h>
-#include <asm/byteorder.h>
-
 enum rseq_cpu_id_state {
 	RSEQ_CPU_ID_UNINITIALIZED = -1,
 	RSEQ_CPU_ID_REGISTRATION_FAILED = -2,
@@ -41,13 +43,20 @@ enum rseq_cs_flags {
 	RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
 	RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
 };
+#endif /* CONFIG_HAS_NO_LIBC_RSEQ_DEFS */
 
+/*
+ * Let's use our own definition of struct rseq_cs because some distros
+ * (for example Mariner GNU/Linux) declares this structure their-own way.
+ * This makes trouble with inconsistency between printf formatters and
+ * struct rseq_cs field types.
+ */
 /*
  * struct rseq_cs is aligned on 4 * 8 bytes to ensure it is always
  * contained within a single cache-line. It is usually declared as
  * link-time constant data.
  */
-struct rseq_cs {
+struct criu_rseq_cs {
 	/* Version of this structure. */
 	__u32 version;
 	/* enum rseq_cs_flags */
@@ -57,7 +66,6 @@ struct rseq_cs {
 	__u64 post_commit_offset;
 	__u64 abort_ip;
 } __attribute__((aligned(4 * sizeof(__u64))));
-#endif /* __GLIBC_HAVE_KERNEL_RSEQ */
 
 /*
  * We have to have our own copy of struct rseq definition because
diff --git a/criu/include/log.h b/criu/include/log.h
index 85e6dc2e7..cbed33007 100644
--- a/criu/include/log.h
+++ b/criu/include/log.h
@@ -60,6 +60,8 @@ void flush_early_log_buffer(int fd);
 
 #define pr_perror(fmt, ...) pr_err(fmt ": %s\n", ##__VA_ARGS__, strerror(errno))
 
+#define pr_pwarn(fmt, ...) pr_warn(fmt ": %s\n", ##__VA_ARGS__, strerror(errno))
+
 #endif /* CR_NOGLIBC */
 
 #endif /* __CR_LOG_H__ */
diff --git a/criu/include/magic.h b/criu/include/magic.h
index 22d7218e4..6f0aff26d 100644
--- a/criu/include/magic.h
+++ b/criu/include/magic.h
@@ -29,7 +29,7 @@
 
 /*
  * The magic-s below correspond to coordinates
- * of various Russian towns in the NNNNEEEE form.
+ * of various towns in the NNNNEEEE form.
  */
 
 #define INVENTORY_MAGIC	     0x58313116 /* Veliky Novgorod */
@@ -100,6 +100,7 @@
 #define BPFMAP_FILE_MAGIC    0x57506142 /* Alapayevsk */
 #define BPFMAP_DATA_MAGIC    0x64324033 /* Arkhangelsk */
 #define APPARMOR_MAGIC	     0x59423047 /* Nikolskoye */
+#define PIDFD_MAGIC	     0x54435556 /* Ufa */
 
 #define IFADDR_MAGIC	RAW_IMAGE_MAGIC
 #define ROUTE_MAGIC	RAW_IMAGE_MAGIC
diff --git a/criu/include/mem.h b/criu/include/mem.h
index 03574ea3d..e9ce3518a 100644
--- a/criu/include/mem.h
+++ b/criu/include/mem.h
@@ -7,6 +7,7 @@
 #include "pid.h"
 #include "proc_parse.h"
 #include "inventory.pb-c.h"
+#include "pagemap-cache.h"
 
 struct parasite_ctl;
 struct vm_area_list;
@@ -30,10 +31,12 @@ extern int do_task_reset_dirty_track(int pid);
 extern unsigned long dump_pages_args_size(struct vm_area_list *vmas);
 extern int parasite_dump_pages_seized(struct pstree_item *item, struct vm_area_list *vma_area_list,
 				      struct mem_dump_ctl *mdc, struct parasite_ctl *ctl);
+extern int collect_madv_guards(pid_t pid, struct vm_area_list *vma_area_list);
 
 #define PME_PRESENT	  (1ULL << 63)
 #define PME_SWAP	  (1ULL << 62)
 #define PME_FILE	  (1ULL << 61)
+#define PME_GUARD_REGION  (1ULL << 58)
 #define PME_SOFT_DIRTY	  (1ULL << 55)
 #define PME_PSHIFT_BITS	  (6)
 #define PME_STATUS_BITS	  (3)
@@ -47,5 +50,12 @@ int open_vmas(struct pstree_item *t);
 int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta);
 int unmap_guard_pages(struct pstree_item *t);
 int prepare_mappings(struct pstree_item *t);
-bool should_dump_page(VmaEntry *vmae, u64 pme);
+
+struct page_info {
+	u64 next;
+	bool softdirty;
+};
+
+int should_dump_page(pmc_t *pmc, VmaEntry *vmae, u64 vaddr, struct page_info *page_info);
+
 #endif /* __CR_MEM_H__ */
diff --git a/criu/include/memfd.h b/criu/include/memfd.h
index 1b1dc79bb..78d810019 100644
--- a/criu/include/memfd.h
+++ b/criu/include/memfd.h
@@ -1,7 +1,9 @@
 #ifndef __CR_MEMFD_H__
 #define __CR_MEMFD_H__
 
+#include <stdbool.h>
 #include <sys/stat.h>
+
 #include "int.h"
 #include "common/config.h"
 
@@ -12,7 +14,7 @@ extern int is_memfd(dev_t dev);
 extern int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms);
 extern const struct fdtype_ops memfd_dump_ops;
 
-extern int memfd_open(struct file_desc *d, u32 *fdflags);
+extern int memfd_open(struct file_desc *d, u32 *fdflags, bool filemap);
 extern struct collect_image_info memfd_cinfo;
 extern struct file_desc *collect_memfd(u32 id);
 extern int apply_memfd_seals(void);
diff --git a/criu/include/mman.h b/criu/include/mman.h
index 8ca71fadf..43e0b6cc7 100644
--- a/criu/include/mman.h
+++ b/criu/include/mman.h
@@ -4,6 +4,9 @@
 #ifndef MAP_HUGETLB
 #define MAP_HUGETLB 0x40000
 #endif
+#ifndef MAP_DROPPABLE
+#define MAP_DROPPABLE 0x08
+#endif
 #ifndef MADV_HUGEPAGE
 #define MADV_HUGEPAGE 14
 #endif
@@ -13,5 +16,11 @@
 #ifndef MADV_DONTDUMP
 #define MADV_DONTDUMP 16
 #endif
+#ifndef MADV_WIPEONFORK
+#define MADV_WIPEONFORK 18
+#endif
+#ifndef MADV_GUARD_INSTALL
+#define MADV_GUARD_INSTALL 102
+#endif
 
 #endif /* __CR_MMAN_H__ */
diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index e2ea6e17f..183a3b852 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -1,6 +1,8 @@
 #ifndef __CR_NS_H__
 #define __CR_NS_H__
 
+#include <sys/socket.h>
+
 #include "common/compiler.h"
 #include "files.h"
 #include "common/list.h"
@@ -224,4 +226,19 @@ extern int add_ns_shared_cb(int (*actor)(void *data), void *data);
 extern struct ns_id *get_socket_ns(int lfd);
 extern struct ns_id *lookup_ns_by_kid(unsigned int kid, struct ns_desc *nd);
 
+struct unsc_msg {
+	struct msghdr h;
+	/*
+	 * 0th is the call address
+	 * 1st is the flags
+	 * 2nd is the optional (NULL in response) arguments
+	 */
+	struct iovec iov[3];
+	char c[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))];
+};
+
+extern void unsc_msg_init(struct unsc_msg *m, uns_call_t *c, int *x, void *arg, size_t asize, int fd, pid_t *pid);
+extern void unsc_msg_pid_fd(struct unsc_msg *um, pid_t *pid, int *fd);
+extern int start_unix_cred_daemon(pid_t *pid, int (*daemon_func)(int sk));
+
 #endif /* __CR_NS_H__ */
diff --git a/criu/include/net.h b/criu/include/net.h
index 0da4cad13..7c5ede21e 100644
--- a/criu/include/net.h
+++ b/criu/include/net.h
@@ -31,7 +31,7 @@ extern int collect_net_namespaces(bool for_dump);
 
 extern int network_lock(void);
 extern void network_unlock(void);
-extern int network_lock_internal(void);
+extern int network_lock_internal(bool restore);
 
 extern struct ns_desc net_ns_desc;
 
@@ -50,7 +50,6 @@ extern int kerndat_has_newifindex(void);
 extern int kerndat_link_nsid(void);
 extern int net_get_nsid(int rtsk, int fd, int *nsid);
 extern struct ns_id *net_get_root_ns(void);
-extern int kerndat_nsid(void);
 extern void check_has_netns_ioc(int fd, bool *kdat_val, const char *name);
 extern int net_set_ext(struct ns_id *ns);
 extern struct ns_id *get_root_netns(void);
diff --git a/criu/include/page-pipe.h b/criu/include/page-pipe.h
index 15178c015..65292b7ab 100644
--- a/criu/include/page-pipe.h
+++ b/criu/include/page-pipe.h
@@ -92,9 +92,9 @@ struct kernel_pipe_buffer {
 struct page_pipe_buf {
 	int p[2];		/* pipe with pages */
 	unsigned int pipe_size; /* how many pages can be fit into pipe */
-	unsigned int pipe_off;	/* where this buf is started in a pipe */
-	unsigned int pages_in;	/* how many pages are there */
 	unsigned int nr_segs;	/* how many iov-s are busy */
+	unsigned long pipe_off;	/* where this buf is started in a pipe */
+	unsigned long pages_in;	/* how many pages are there */
 #define PPB_LAZY (1 << 0)
 	unsigned int flags;
 	struct iovec *iov;  /* vaddr:len map */
@@ -149,7 +149,7 @@ struct pipe_read_dest {
 };
 
 extern int pipe_read_dest_init(struct pipe_read_dest *prd);
-extern int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, unsigned long addr, unsigned int *nr_pages,
+extern int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, unsigned long addr, unsigned long *nr_pages,
 			  unsigned int ppb_flags);
 
 #endif /* __CR_PAGE_PIPE_H__ */
diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h
index 36fe67092..0d9b35019 100644
--- a/criu/include/page-xfer.h
+++ b/criu/include/page-xfer.h
@@ -69,9 +69,9 @@ extern int check_parent_page_xfer(int fd_type, unsigned long id);
  */
 
 /* async request/receive of remote pages */
-extern int request_remote_pages(unsigned long img_id, unsigned long addr, int nr_pages);
+extern int request_remote_pages(unsigned long img_id, unsigned long addr, unsigned long nr_pages);
 
-typedef int (*ps_async_read_complete)(unsigned long img_id, unsigned long vaddr, int nr_pages, void *);
-extern int page_server_start_read(void *buf, int nr_pages, ps_async_read_complete complete, void *priv, unsigned flags);
+typedef int (*ps_async_read_complete)(unsigned long img_id, unsigned long vaddr, unsigned long nr_pages, void *);
+extern int page_server_start_read(void *buf, unsigned long nr_pages, ps_async_read_complete complete, void *priv, unsigned flags);
 
 #endif /* __CR_PAGE_XFER__H__ */
diff --git a/criu/include/pagemap-cache.h b/criu/include/pagemap-cache.h
index 1d8bbffaf..875e69e56 100644
--- a/criu/include/pagemap-cache.h
+++ b/criu/include/pagemap-cache.h
@@ -1,10 +1,12 @@
 #ifndef __CR_PAGEMAP_H__
 #define __CR_PAGEMAP_H__
 
+#include <stdbool.h>
 #include <sys/types.h>
 #include "int.h"
 
 #include "common/list.h"
+#include "pagemap_scan.h"
 
 struct vma_area;
 
@@ -15,9 +17,15 @@ typedef struct {
 	unsigned long start;		  /* start of area */
 	unsigned long end;		  /* end of area */
 	const struct list_head *vma_head; /* list head of VMAs we're serving */
+	int fd;				  /* file to read PMs from */
+
 	u64 *map;			  /* local buffer */
 	size_t map_len;			  /* length of a buffer */
-	int fd;				  /* file to read PMs from */
+
+	struct page_region *regs; /* buffer for the PAGEMAP_SCAN ioctl */
+	size_t regs_len;	  /* actual length of regs */
+	size_t regs_max_len;	  /* maximum length of regs */
+	size_t regs_idx;	  /* current index in the regs array */
 } pmc_t;
 
 #define PMC_INIT \
@@ -26,7 +34,8 @@ typedef struct {
 	}
 
 extern int pmc_init(pmc_t *pmc, pid_t pid, const struct list_head *vma_head, size_t size);
-extern u64 *pmc_get_map(pmc_t *pmc, const struct vma_area *vma);
+extern int pmc_get_map(pmc_t *pmc, const struct vma_area *vma);
 extern void pmc_fini(pmc_t *pmc);
+extern int pmc_fill(pmc_t *pmc, u64 start, u64 end);
 
 #endif /* __CR_PAGEMAP_H__ */
diff --git a/criu/include/pagemap.h b/criu/include/pagemap.h
index 8c7180559..4cbc87cc6 100644
--- a/criu/include/pagemap.h
+++ b/criu/include/pagemap.h
@@ -44,7 +44,7 @@
 
 struct page_read {
 	/* reads page from current pagemap */
-	int (*read_pages)(struct page_read *, unsigned long vaddr, int nr, void *, unsigned flags);
+	int (*read_pages)(struct page_read *, unsigned long vaddr, unsigned long nr, void *, unsigned flags);
 	/* Advance page_read to the next entry */
 	int (*advance)(struct page_read *pr);
 	void (*close)(struct page_read *);
@@ -52,12 +52,15 @@ struct page_read {
 	int (*sync)(struct page_read *pr);
 	int (*seek_pagemap)(struct page_read *pr, unsigned long vaddr);
 	void (*reset)(struct page_read *pr);
-	int (*io_complete)(struct page_read *, unsigned long vaddr, int nr);
-	int (*maybe_read_page)(struct page_read *pr, unsigned long vaddr, int nr, void *buf, unsigned flags);
+	int (*io_complete)(struct page_read *, unsigned long vaddr, unsigned long nr);
+	int (*maybe_read_page)(struct page_read *pr, unsigned long vaddr, unsigned long nr, void *buf, unsigned flags);
 
 	/* Whether or not pages can be read in PIE code */
 	bool pieok;
 
+	/* Whether or not disable image deduplication*/
+	bool disable_dedup;
+
 	/* Private data of reader */
 	struct cr_img *pmi;
 	struct cr_img *pi;
@@ -112,6 +115,8 @@ int pagemap_render_iovec(struct list_head *from, struct task_restore_args *ta);
  */
 extern void dup_page_read(struct page_read *src, struct page_read *dst);
 
+extern void page_read_disable_dedup(struct page_read *pr);
+
 extern int dedup_one_iovec(struct page_read *pr, unsigned long base, unsigned long len);
 
 static inline unsigned long pagemap_len(PagemapEntry *pe)
diff --git a/criu/include/pagemap_scan.h b/criu/include/pagemap_scan.h
new file mode 100644
index 000000000..9046e01ed
--- /dev/null
+++ b/criu/include/pagemap_scan.h
@@ -0,0 +1,69 @@
+#ifndef __CR_PAGEMAP_SCAN_H__
+#define __CR_PAGEMAP_SCAN_H__
+
+#ifndef PAGEMAP_SCAN
+#include <sys/types.h>
+#include "int.h"
+
+/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */
+#define PAGE_IS_WPALLOWED  (1 << 0)
+#define PAGE_IS_WRITTEN	   (1 << 1)
+#define PAGE_IS_FILE	   (1 << 2)
+#define PAGE_IS_PRESENT	   (1 << 3)
+#define PAGE_IS_SWAPPED	   (1 << 4)
+#define PAGE_IS_PFNZERO	   (1 << 5)
+#define PAGE_IS_HUGE	   (1 << 6)
+#define PAGE_IS_SOFT_DIRTY (1 << 7)
+#define PAGE_IS_GUARD	   (1 << 8)
+
+/*
+ * struct page_region - Page region with flags
+ * @start:      Start of the region
+ * @end:        End of the region (exclusive)
+ * @categories: PAGE_IS_* category bitmask for the region
+ */
+struct page_region {
+	u64 start;
+	u64 end;
+	u64 categories;
+};
+
+#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
+
+/* Flags for PAGEMAP_SCAN ioctl */
+#define PM_SCAN_WP_MATCHING   (1 << 0) /* Write protect the pages matched. */
+#define PM_SCAN_CHECK_WPASYNC (1 << 1) /* Abort the scan when a non-WP-enabled page is found. */
+
+/*
+ * struct pm_scan_arg - Pagemap ioctl argument
+ * @size:               Size of the structure
+ * @flags:              Flags for the IOCTL
+ * @start:              Starting address of the region
+ * @end:                Ending address of the region
+ * @walk_end            Address where the scan stopped (written by kernel).
+ *                      walk_end == end (address tags cleared) informs that the scan completed on entire range.
+ * @vec:                Address of page_region struct array for output
+ * @vec_len:            Length of the page_region struct array
+ * @max_pages:          Optional limit for number of returned pages (0 = disabled)
+ * @category_inverted:  PAGE_IS_* categories which values match if 0 instead of 1
+ * @category_mask:      Skip pages for which any category doesn't match
+ * @category_anyof_mask: Skip pages for which no category matches
+ * @return_mask:        PAGE_IS_* categories that are to be reported in `page_region`s returned
+ */
+struct pm_scan_arg {
+	u64 size;
+	u64 flags;
+	u64 start;
+	u64 end;
+	u64 walk_end;
+	u64 vec;
+	u64 vec_len;
+	u64 max_pages;
+	u64 category_inverted;
+	u64 category_mask;
+	u64 category_anyof_mask;
+	u64 return_mask;
+};
+#endif /* PAGEMAP_SCAN */
+
+#endif /* __CR_PAGEMAP_SCAN_H__ */
diff --git a/criu/include/parasite-syscall.h b/criu/include/parasite-syscall.h
index 4540e11ee..4a8ec2fee 100644
--- a/criu/include/parasite-syscall.h
+++ b/criu/include/parasite-syscall.h
@@ -21,13 +21,6 @@ struct rt_sigframe;
 struct parasite_ctl;
 struct parasite_thread_ctl;
 
-extern int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct pstree_item *);
-extern int parasite_dump_itimers_seized(struct parasite_ctl *ctl, struct pstree_item *);
-
-struct proc_posix_timers_stat;
-extern int parasite_dump_posix_timers_seized(struct proc_posix_timers_stat *proc_args, struct parasite_ctl *ctl,
-					     struct pstree_item *);
-
 extern int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_misc *misc);
 extern int parasite_dump_creds(struct parasite_ctl *ctl, CredsEntry *ce);
 extern int parasite_dump_thread_leader_seized(struct parasite_ctl *ctl, int pid, CoreEntry *core);
diff --git a/criu/include/parasite.h b/criu/include/parasite.h
index d2a06889f..176357711 100644
--- a/criu/include/parasite.h
+++ b/criu/include/parasite.h
@@ -63,7 +63,7 @@ struct parasite_dump_pages_args {
 	unsigned int add_prot;
 	unsigned int off;
 	unsigned int nr_segs;
-	unsigned int nr_pages;
+	unsigned long nr_pages;
 };
 
 static inline struct parasite_vma_entry *pargs_vmas(struct parasite_dump_pages_args *a)
@@ -118,6 +118,8 @@ static inline int posix_timers_dump_size(int timer_n)
  */
 
 struct parasite_dump_misc {
+	bool has_membarrier_get_registrations; /* this is sent from criu to parasite. */
+
 	unsigned long brk;
 
 	u32 pid;
@@ -128,6 +130,7 @@ struct parasite_dump_misc {
 	int dumpable;
 	int thp_disabled;
 	int child_subreaper;
+	int membarrier_registration_mask;
 };
 
 /*
@@ -145,9 +148,11 @@ struct parasite_dump_creds {
 	u32 cap_prm[CR_CAP_SIZE];
 	u32 cap_eff[CR_CAP_SIZE];
 	u32 cap_bnd[CR_CAP_SIZE];
+	u32 cap_amb[CR_CAP_SIZE];
 
 	int uids[4];
 	int gids[4];
+	int no_new_privs;
 	unsigned int secbits;
 	unsigned int ngroups;
 	/*
@@ -241,7 +246,12 @@ struct parasite_dump_cgroup_args {
 	 *
 	 * The string is null terminated.
 	 */
-	char contents[1 << 12];
+	char contents[(1 << 12) - 32];
+	/*
+	 * Contains the path to thread cgroup procfs.
+	 * "self/task/<tid>/cgroup"
+	 */
+	char thread_cgrp[32];
 };
 
 #endif /* !__ASSEMBLY__ */
diff --git a/criu/include/pid.h b/criu/include/pid.h
index 49cb2d322..b2b7a361a 100644
--- a/criu/include/pid.h
+++ b/criu/include/pid.h
@@ -31,6 +31,10 @@ struct pid {
 	pid_t real;
 
 	int state; /* TASK_XXX constants */
+	/* If an item is in stopped state it has a signal number
+	 * that caused task to stop.
+	 */
+	int stop_signo;
 
 	/*
 	 * The @virt pid is one which used in the image itself and keeps
diff --git a/criu/include/pidfd.h b/criu/include/pidfd.h
new file mode 100644
index 000000000..bcc0fb45a
--- /dev/null
+++ b/criu/include/pidfd.h
@@ -0,0 +1,16 @@
+#ifndef __CR_PIDFD_H__
+#define __CR_PIDFD_H__
+
+#include "files.h"
+#include "pidfd.pb-c.h"
+
+extern const struct fdtype_ops pidfd_dump_ops;
+extern struct collect_image_info pidfd_cinfo;
+extern int is_pidfd_link(char *link);
+extern void init_dead_pidfd_hash(void);
+struct pidfd_dump_info {
+	PidfdEntry pidfe;
+	pid_t pid;
+};
+
+#endif /* __CR_PIDFD_H__ */
diff --git a/criu/include/prctl.h b/criu/include/prctl.h
index c843f40a7..2966659da 100644
--- a/criu/include/prctl.h
+++ b/criu/include/prctl.h
@@ -30,6 +30,21 @@
 #ifndef PR_SET_DUMPABLE
 #define PR_SET_DUMPABLE 4
 #endif
+#ifndef PR_GET_NO_NEW_PRIVS
+#define PR_GET_NO_NEW_PRIVS 39
+#endif
+#ifndef PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#endif
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT 47
+#endif
+#ifndef PR_CAP_AMBIENT_IS_SET
+#define PR_CAP_AMBIENT_IS_SET 1
+#endif
+#ifndef PR_CAP_AMBIENT_RAISE
+#define PR_CAP_AMBIENT_RAISE 2
+#endif
 
 #ifndef PR_SET_MM
 #define PR_SET_MM	      35
@@ -82,4 +97,11 @@ struct prctl_mm_map {
 #define PR_GET_THP_DISABLE 42
 #endif
 
+#ifndef PR_TIMER_CREATE_RESTORE_IDS
+#define PR_TIMER_CREATE_RESTORE_IDS             77
+# define PR_TIMER_CREATE_RESTORE_IDS_OFF        0
+# define PR_TIMER_CREATE_RESTORE_IDS_ON         1
+# define PR_TIMER_CREATE_RESTORE_IDS_GET        2
+#endif
+
 #endif /* __CR_PRCTL_H__ */
diff --git a/criu/include/proc_parse.h b/criu/include/proc_parse.h
index 0c334a190..76d3242d2 100644
--- a/criu/include/proc_parse.h
+++ b/criu/include/proc_parse.h
@@ -81,6 +81,7 @@ struct proc_status_creds {
 	u32 cap_prm[PROC_CAP_SIZE];
 	u32 cap_eff[PROC_CAP_SIZE];
 	u32 cap_bnd[PROC_CAP_SIZE];
+	u32 cap_amb[PROC_CAP_SIZE];
 };
 
 #define INVALID_UID ((uid_t)-1)
@@ -104,4 +105,6 @@ extern int parse_uptime(uint64_t *upt);
 
 extern int parse_timens_offsets(struct timespec *boff, struct timespec *moff);
 
+extern bool found_uprobes_vma(void);
+
 #endif /* __CR_PROC_PARSE_H__ */
diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h
index 3824de101..c4241be55 100644
--- a/criu/include/protobuf-desc.h
+++ b/criu/include/protobuf-desc.h
@@ -70,6 +70,7 @@ enum {
 	PB_BPFMAP_FILE,
 	PB_BPFMAP_DATA,
 	PB_APPARMOR,
+	PB_PIDFD,
 
 	/* PB_AUTOGEN_STOP */
 
diff --git a/criu/include/pstree.h b/criu/include/pstree.h
index 8ae750e1a..b750a919e 100644
--- a/criu/include/pstree.h
+++ b/criu/include/pstree.h
@@ -63,7 +63,7 @@ struct dmp_info {
 	struct parasite_ctl *parasite_ctl;
 	struct parasite_thread_ctl **thread_ctls;
 	uint64_t *thread_sp;
-	struct rseq_cs *thread_rseq_cs;
+	struct criu_rseq_cs *thread_rseq_cs;
 
 	/*
 	 * Although we don't support dumping different struct creds in general,
@@ -104,6 +104,7 @@ extern void pstree_insert_pid(struct pid *pid_node);
 extern struct pid *pstree_pid_by_virt(pid_t pid);
 
 extern struct pstree_item *root_item;
+extern bool has_children(struct pstree_item *item);
 extern struct pstree_item *pstree_item_next(struct pstree_item *item);
 #define for_each_pstree_item(pi) for (pi = root_item; pi != NULL; pi = pstree_item_next(pi))
 
diff --git a/criu/include/rbtree.h b/criu/include/rbtree.h
index ba0a8100e..6981aa8f9 100644
--- a/criu/include/rbtree.h
+++ b/criu/include/rbtree.h
@@ -14,7 +14,7 @@
 #define RB_MASK	 3
 
 struct rb_node {
-	unsigned long rb_parent_color; /* Keeps both parent anc color */
+	unsigned long rb_parent_color; /* Keeps both parent and color */
 	struct rb_node *rb_right;
 	struct rb_node *rb_left;
 } __aligned(sizeof(long));
diff --git a/criu/include/restore.h b/criu/include/restore.h
index 8ef0dbddf..189051826 100644
--- a/criu/include/restore.h
+++ b/criu/include/restore.h
@@ -7,4 +7,57 @@
 
 extern int arch_set_thread_regs_nosigrt(struct pid *pid);
 
+struct task_restore_args;
+struct pstree_item;
+struct rst_shstk_info;
+
+#ifndef arch_shstk_prepare
+static inline int arch_shstk_prepare(struct pstree_item *item,
+				     CoreEntry *core,
+				     struct task_restore_args *ta)
+{
+	return 0;
+}
+#define arch_shstk_prepare arch_shstk_prepare
+#endif
+
+#ifndef arch_shstk_unlock
+static inline int arch_shstk_unlock(struct pstree_item *item,
+				    CoreEntry *core, pid_t pid)
+{
+	return 0;
+}
+#define arch_shstk_unlock arch_shstk_unlock
+#endif
+
+#ifndef arch_shstk_trampoline
+static inline int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
+				    int (*func)(void *arg), void *arg)
+{
+	return func(arg);
+}
+#define arch_shstk_trampoline arch_shstk_trampoline
+#endif
+
+#ifndef shstk_restorer_stack_size
+static always_inline long shstk_restorer_stack_size(void)
+{
+	return 0;
+}
+#endif
+
+#ifndef shstk_set_restorer_stack
+static always_inline long shstk_set_restorer_stack(struct rst_shstk_info *info, void *ptr)
+{
+	return 0;
+}
+#endif
+
+#ifndef shstk_min_mmap_addr
+static always_inline long shstk_min_mmap_addr(struct rst_shstk_info *info, unsigned long def)
+{
+	return def;
+}
+#endif
+
 #endif
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
index 325804e44..14c0a3768 100644
--- a/criu/include/restorer.h
+++ b/criu/include/restorer.h
@@ -56,6 +56,10 @@ struct restore_posix_timer {
 	int overrun;
 };
 
+#ifndef rst_shstk_info
+struct rst_shstk_info {};
+#endif
+
 /*
  * We should be able to construct fpu sigframe in sigreturn_prep_fpu_frame,
  * so the mem_zone.rt_sigframe should be 64-bytes aligned. To make things
@@ -71,8 +75,8 @@ struct thread_creds_args {
 	u32 cap_prm[CR_CAP_SIZE];
 	u32 cap_eff[CR_CAP_SIZE];
 	u32 cap_bnd[CR_CAP_SIZE];
+	u32 cap_amb[CR_CAP_SIZE];
 
-	unsigned int secbits;
 	char *lsm_profile;
 	unsigned int *groups;
 	char *lsm_sockcreate;
@@ -120,7 +124,11 @@ struct thread_restore_args {
 	unsigned int seccomp_filters_n;
 	bool seccomp_force_tsync;
 
+	struct rst_shstk_info shstk;
+
 	char comm[TASK_COMM_LEN];
+	int cg_set;
+	int cgroupd_sk;
 } __aligned(64);
 
 typedef long (*thread_restore_fcall_t)(struct thread_restore_args *args);
@@ -142,7 +150,7 @@ struct task_restore_args {
 	struct timeval logstart;
 
 	int uffd;
-	bool has_thp_enabled;
+	bool thp_disabled;
 
 	/* threads restoration */
 	int nr_threads;				 /* number of threads */
@@ -162,6 +170,7 @@ struct task_restore_args {
 
 	struct restore_posix_timer *posix_timers;
 	unsigned int posix_timers_n;
+	bool posix_timer_cr_ids;
 
 	struct restore_timerfd *timerfd;
 	unsigned int timerfd_n;
@@ -228,6 +237,7 @@ struct task_restore_args {
 #endif
 	int lsm_type;
 	int child_subreaper;
+	int membarrier_registration_mask;
 	bool has_clone3_set_tid;
 
 	/*
@@ -235,6 +245,11 @@ struct task_restore_args {
 	 * unregister it before memory restoration procedure
 	 */
 	struct rst_rseq_param libc_rseq;
+
+	uid_t uid;
+	u32 cap_eff[CR_CAP_SIZE];
+
+	struct rst_shstk_info shstk;
 } __aligned(64);
 
 /*
@@ -326,4 +341,27 @@ enum {
 #define __r_sym(name)		  restorer_sym##name
 #define restorer_sym(rblob, name) (void *)(rblob + __r_sym(name))
 
+#ifndef arch_shstk_switch_to_restorer
+static inline int arch_shstk_switch_to_restorer(struct rst_shstk_info *shstk)
+{
+	return 0;
+}
+#define arch_shstk_switch_to_restorer arch_shstk_switch_to_restorer
+#endif
+
+#ifndef arch_shstk_restore
+static inline int arch_shstk_restore(struct rst_shstk_info *shstk)
+{
+	return 0;
+}
+#define arch_shstk_restore arch_shstk_restore
+#endif
+
+#ifndef shstk_vma_restore
+static always_inline int shstk_vma_restore(VmaEntry *vma_entry)
+{
+	return -1;
+}
+#endif
+
 #endif /* __CR_RESTORER_H__ */
diff --git a/criu/include/rst_info.h b/criu/include/rst_info.h
index d0a3db6c5..deb297e5f 100644
--- a/criu/include/rst_info.h
+++ b/criu/include/rst_info.h
@@ -1,6 +1,7 @@
 #ifndef __CR_RST_INFO_H__
 #define __CR_RST_INFO_H__
 
+#include "asm/restore.h"
 #include "common/lock.h"
 #include "common/list.h"
 #include "vma.h"
@@ -14,6 +15,7 @@ struct task_entries {
 	futex_t start;
 	atomic_t cr_err;
 	mutex_t userns_sync_lock;
+	mutex_t cgroupd_sync_lock;
 	mutex_t last_pid_mutex;
 };
 
@@ -22,7 +24,7 @@ struct fdt {
 	pid_t pid; /* Who should restore this fd table */
 	/*
 	 * The fd table is ready for restoing, if fdt_lock is equal to nr
-	 * The fdt table was restrored, if fdt_lock is equal to nr + 1
+	 * The fdt table was restored, if fdt_lock is equal to nr + 1
 	 */
 	futex_t fdt_lock;
 };
@@ -32,6 +34,11 @@ struct rst_rseq {
 	uint64_t rseq_cs_pointer;
 };
 
+#ifndef ARCH_RST_INFO
+struct rst_arch_info {
+};
+#endif
+
 struct rst_info {
 	struct list_head fds;
 
@@ -73,11 +80,14 @@ struct rst_info {
 	 */
 	bool has_old_seccomp_filter;
 
-	bool has_thp_enabled;
-
 	struct rst_rseq *rseqe;
 
+	futex_t shstk_enable;
+	futex_t shstk_unlock;
+
 	void *breakpoint;
+
+	struct rst_arch_info arch_info;
 };
 
 extern struct task_entries *task_entries;
diff --git a/criu/include/seize.h b/criu/include/seize.h
index cf7366cb0..fc7facad3 100644
--- a/criu/include/seize.h
+++ b/criu/include/seize.h
@@ -2,8 +2,14 @@
 #define __CR_SEIZE_H__
 
 extern int collect_pstree(void);
+extern int checkpoint_devices(void);
+struct pstree_item;
 extern void pstree_switch_state(struct pstree_item *root_item, int st);
 extern const char *get_real_freezer_state(void);
 extern bool alarm_timeouted(void);
 
+extern char *task_comm_info(pid_t pid, char *comm, size_t size);
+extern char *__task_comm_info(pid_t pid);
+extern void set_compel_interrupt_only_mode(void);
+
 #endif
diff --git a/criu/include/servicefd.h b/criu/include/servicefd.h
index c6979de7f..4265d94ed 100644
--- a/criu/include/servicefd.h
+++ b/criu/include/servicefd.h
@@ -24,6 +24,7 @@ enum sfd_type {
 				 */
 	ROOT_FD_OFF,	/* Root of the namespace we dump/restore */
 	CGROUP_YARD,
+	CGROUPD_SK,	  /* Socket for cgroupd to fix up thread's cgroup controller */
 	USERNSD_SK,	  /* Socket for usernsd */
 	NS_FD_OFF,	  /* Node's net namespace fd */
 	TRANSPORT_FD_OFF, /* to transfer file descriptors */
diff --git a/criu/include/setproctitle.h b/criu/include/setproctitle.h
index bc634331b..a4873578a 100644
--- a/criu/include/setproctitle.h
+++ b/criu/include/setproctitle.h
@@ -1,19 +1,7 @@
 #ifndef __CR_SETPROCTITLE_H__
 #define __CR_SETPROCTITLE_H__
 
-#ifdef CONFIG_HAS_LIBBSD
-#include <bsd/unistd.h>
-#else
-
-/*
- * setproctitle_init is in the libbsd since v0.6.0. This macro allows to
- * compile criu with libbsd<0.6.0.
- */
-#ifndef CONFIG_HAS_SETPROCTITLE_INIT
-#define setproctitle_init(argc, argv, envp)
-#endif
-
-#define setproctitle(fmt, ...)
-#endif
+extern void __setproctitle_init(int argc, char *argv[], char *envp[]);
+extern void __setproctitle(const char *fmt, ...);
 
 #endif /* __CR_SETPROCTITLE_H__ */
diff --git a/criu/include/shmem.h b/criu/include/shmem.h
index 813ef630e..15cab1146 100644
--- a/criu/include/shmem.h
+++ b/criu/include/shmem.h
@@ -4,13 +4,14 @@
 #include "int.h"
 #include "common/lock.h"
 #include "images/vma.pb-c.h"
+#include "pagemap-cache.h"
 
 struct vma_area;
 
 extern int collect_shmem(int pid, struct vma_area *vma);
 extern int collect_sysv_shmem(unsigned long shmid, unsigned long size);
 extern int cr_dump_shmem(void);
-extern int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map);
+extern int add_shmem_area(pid_t pid, VmaEntry *vma, pmc_t *pmc);
 extern int fixup_sysv_shmems(void);
 extern int dump_one_memfd_shmem(int fd, unsigned long shmid, unsigned long size);
 extern int dump_one_sysv_shmem(void *addr, unsigned long size, unsigned long shmid);
diff --git a/criu/include/sigact.h b/criu/include/sigact.h
new file mode 100644
index 000000000..4df011f96
--- /dev/null
+++ b/criu/include/sigact.h
@@ -0,0 +1,14 @@
+#ifndef __CR_SIGACT_H__
+#define __CR_SIGACT_H__
+
+#include "images/core.pb-c.h"
+
+extern rt_sigaction_t sigchld_act;
+
+struct parasite_ctl;
+struct pstree_item;
+
+extern int prepare_sigactions(CoreEntry *core);
+extern int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct pstree_item *);
+
+#endif
diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h
index 5dd2a6551..69ee8589e 100644
--- a/criu/include/sk-inet.h
+++ b/criu/include/sk-inet.h
@@ -69,6 +69,7 @@ extern int inet_connect(int sk, struct inet_sk_info *);
 
 #ifdef CR_NOGLIBC
 #define setsockopt sys_setsockopt
+#define pr_perror(fmt, ...) pr_err(fmt ": errno %d\n", ##__VA_ARGS__, -ret)
 #endif
 static inline void tcp_repair_off(int fd)
 {
@@ -76,7 +77,7 @@ static inline void tcp_repair_off(int fd)
 
 	ret = setsockopt(fd, SOL_TCP, TCP_REPAIR, &aux, sizeof(aux));
 	if (ret < 0)
-		pr_err("Failed to turn off repair mode on socket: %m\n");
+		pr_perror("Failed to turn off repair mode on socket %d", fd);
 }
 
 extern void tcp_locked_conn_add(struct inet_sk_info *);
@@ -86,6 +87,9 @@ extern void cpt_unlock_tcp_connections(void);
 extern int dump_one_tcp(int sk, struct inet_sk_desc *sd, SkOptsEntry *soe);
 extern int restore_one_tcp(int sk, struct inet_sk_info *si);
 
+extern int dump_tcp_opts(int sk, TcpOptsEntry *toe);
+extern int restore_tcp_opts(int sk, TcpOptsEntry *toe);
+
 #define SK_EST_PARAM	  "tcp-established"
 #define SK_INFLIGHT_PARAM "skip-in-flight"
 #define SK_CLOSE_PARAM	  "tcp-close"
diff --git a/criu/include/sockets.h b/criu/include/sockets.h
index 399d38664..6c81d3edd 100644
--- a/criu/include/sockets.h
+++ b/criu/include/sockets.h
@@ -25,8 +25,9 @@ struct socket_desc {
 };
 
 extern int dump_socket(struct fd_parms *p, int lfd, FdinfoEntry *);
-extern int dump_socket_opts(int sk, SkOptsEntry *soe);
+extern int dump_socket_opts(int sk, int family, SkOptsEntry *soe);
 extern int restore_socket_opts(int sk, SkOptsEntry *soe);
+extern int sk_setbufs(int sk, uint32_t *bufs);
 extern void release_skopts(SkOptsEntry *);
 extern int restore_prepare_socket(int sk);
 extern void preload_socket_modules(void);
diff --git a/criu/include/string.h b/criu/include/string.h
index e11a42058..4c71d961c 100644
--- a/criu/include/string.h
+++ b/criu/include/string.h
@@ -3,18 +3,9 @@
 
 #include <sys/types.h>
 
-#ifdef CONFIG_HAS_LIBBSD
-#include <bsd/string.h>
-#endif
-
 #include "common/config.h"
 
-#ifndef CONFIG_HAS_STRLCPY
-extern size_t strlcpy(char *dest, const char *src, size_t size);
-#endif
-
-#ifndef CONFIG_HAS_STRLCAT
-extern size_t strlcat(char *dest, const char *src, size_t count);
-#endif
+extern size_t __strlcpy(char *dest, const char *src, size_t size);
+extern size_t __strlcat(char *dest, const char *src, size_t count);
 
 #endif /* __CR_STRING_H__ */
diff --git a/criu/include/sysctl.h b/criu/include/sysctl.h
index ac7924dcd..2d689a9a0 100644
--- a/criu/include/sysctl.h
+++ b/criu/include/sysctl.h
@@ -34,8 +34,9 @@ enum {
 /*
  * Some entries might be missing mark them as optional.
  */
-#define CTL_FLAGS_OPTIONAL	1
-#define CTL_FLAGS_HAS		2
-#define CTL_FLAGS_READ_EIO_SKIP 4
+#define CTL_FLAGS_OPTIONAL	  1
+#define CTL_FLAGS_HAS		  2
+#define CTL_FLAGS_READ_EIO_SKIP	  4
+#define CTL_FLAGS_IPC_EACCES_SKIP 8
 
 #endif /* __CR_SYSCTL_H__ */
diff --git a/criu/include/timer.h b/criu/include/timer.h
new file mode 100644
index 000000000..d1deb6051
--- /dev/null
+++ b/criu/include/timer.h
@@ -0,0 +1,17 @@
+#ifndef __CR_TIMER_H__
+#define __CR_TIMER_H__
+
+#include "images/core.pb-c.h"
+
+struct task_restore_args;
+struct pstree_item;
+struct parasite_ctl;
+struct proc_posix_timers_stat;
+
+extern int prepare_itimers(int pid, struct task_restore_args *args, CoreEntry *core);
+extern int prepare_posix_timers(int pid, struct task_restore_args *ta, CoreEntry *core);
+
+extern int parasite_dump_itimers_seized(struct parasite_ctl *ctl, struct pstree_item *item);
+extern int parasite_dump_posix_timers_seized(struct proc_posix_timers_stat *proc_args, struct parasite_ctl *ctl,
+					     struct pstree_item *item);
+#endif
diff --git a/criu/include/util-caps.h b/criu/include/util-caps.h
new file mode 100644
index 000000000..7ccd162f5
--- /dev/null
+++ b/criu/include/util-caps.h
@@ -0,0 +1,58 @@
+#ifndef __CR_UTIL_CAPS_H__
+#define __CR_UTIL_CAPS_H__
+
+#include <sys/capability.h>
+
+#ifndef CAP_CHECKPOINT_RESTORE
+#define CAP_CHECKPOINT_RESTORE 40
+#endif
+
+static inline bool has_capability(int cap, u32 *cap_eff)
+{
+	int mask = CAP_TO_MASK(cap);
+	int index = CAP_TO_INDEX(cap);
+	u32 effective;
+
+	effective = cap_eff[index];
+
+	if (!(mask & effective)) {
+		pr_debug("Effective capability %d missing\n", cap);
+		return false;
+	}
+
+	return true;
+}
+
+static inline bool has_cap_checkpoint_restore(u32 *cap_eff)
+{
+	/*
+	 * Everything guarded by CAP_CHECKPOINT_RESTORE is also
+	 * guarded by CAP_SYS_ADMIN. Check for both capabilities.
+	 */
+	if (has_capability(CAP_CHECKPOINT_RESTORE, cap_eff) || has_capability(CAP_SYS_ADMIN, cap_eff))
+		return true;
+
+	return false;
+}
+
+static inline bool has_cap_net_admin(u32 *cap_eff)
+{
+	return has_capability(CAP_NET_ADMIN, cap_eff);
+}
+
+static inline bool has_cap_sys_chroot(u32 *cap_eff)
+{
+	return has_capability(CAP_SYS_CHROOT, cap_eff);
+}
+
+static inline bool has_cap_setuid(u32 *cap_eff)
+{
+	return has_capability(CAP_SETUID, cap_eff);
+}
+
+static inline bool has_cap_sys_resource(u32 *cap_eff)
+{
+	return has_capability(CAP_SYS_RESOURCE, cap_eff);
+}
+
+#endif /* __CR_UTIL_CAPS_H__ */
diff --git a/criu/include/util-vdso.h b/criu/include/util-vdso.h
index c4386cf8e..9fd9a6de4 100644
--- a/criu/include/util-vdso.h
+++ b/criu/include/util-vdso.h
@@ -30,6 +30,7 @@ struct vdso_symbol {
 struct vdso_symtable {
 	unsigned long vdso_size;
 	unsigned long vvar_size;
+	unsigned long vvar_vclock_size;
 	struct vdso_symbol symbols[VDSO_SYMBOL_MAX];
 	bool vdso_before_vvar; /* order of vdso/vvar pair */
 };
diff --git a/criu/include/util.h b/criu/include/util.h
index 4e29c079e..55ad5b63c 100644
--- a/criu/include/util.h
+++ b/criu/include/util.h
@@ -21,6 +21,8 @@
 #include "log.h"
 #include "common/err.h"
 
+#include "compel/infect-util.h"
+
 #define PREF_SHIFT_OP(pref, op, size) ((size)op(pref##BYTES_SHIFT))
 #define KBYTES_SHIFT		      10
 #define MBYTES_SHIFT		      20
@@ -170,6 +172,7 @@ extern pid_t fork_and_ptrace_attach(int (*child_setup)(void));
 extern int cr_daemon(int nochdir, int noclose, int close_fd);
 extern int status_ready(void);
 extern int is_root_user(void);
+extern int close_fds(int minfd);
 
 extern int set_proc_self_fd(int fd);
 
@@ -263,6 +266,10 @@ bool is_path_prefix(const char *path, const char *prefix);
 FILE *fopenat(int dirfd, char *path, char *cflags);
 void split(char *str, char token, char ***out, int *n);
 
+int cr_fchown(int fd, uid_t new_uid, gid_t new_gid);
+int cr_fchperm(int fd, uid_t new_uid, gid_t new_gid, mode_t new_mode);
+int cr_fchpermat(int dirfd, const char *path, uid_t new_uid, gid_t new_gid, mode_t new_mode, int flags);
+
 int fd_has_data(int lfd);
 
 int make_yard(char *path);
@@ -274,8 +281,6 @@ static inline int sk_wait_data(int sk)
 }
 
 void fd_set_nonblocking(int fd, bool on);
-void tcp_nodelay(int sk, bool on);
-void tcp_cork(int sk, bool on);
 
 const char *ns_to_string(unsigned int ns);
 
@@ -384,7 +389,14 @@ static inline void print_stack_trace(pid_t pid)
 
 extern int mount_detached_fs(const char *fsname);
 
-extern char *get_legacy_iptables_bin(bool ipv6);
+extern int cr_fsopen(const char *fsname, unsigned int flags);
+extern int cr_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux);
+extern int cr_fsmount(int fd, unsigned int flags, unsigned int attr_flags);
+extern void fsfd_dump_messages(int fd);
+
+extern char *get_legacy_iptables_bin(bool ipv6, bool restore);
+
+extern int set_opts_cap_eff(void);
 
 extern ssize_t read_all(int fd, void *buf, size_t size);
 extern ssize_t write_all(int fd, const void *buf, size_t size);
@@ -396,15 +408,27 @@ static inline void cleanup_freep(void *p)
 	free(*pp);
 }
 
+#define cleanup_file __attribute__((cleanup(cleanup_filep)))
+static inline void cleanup_filep(FILE **f)
+{
+	FILE *file = *f;
+	if (file)
+		(void)fclose(file);
+}
+
 extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args);
 
 /*
  * criu_run_id is a unique value of the current run. It can be used to
  * generate resource ID-s to avoid conflicts with other CRIU processes.
  */
-extern uint64_t criu_run_id;
+extern char criu_run_id[RUN_ID_HASH_LENGTH];
 extern void util_init(void);
+#define NO_DUMP_CRIU_RUN_ID 0x7f
+extern char dump_criu_run_id[RUN_ID_HASH_LENGTH];
 
 extern char *resolve_mountpoint(char *path);
 
+extern int cr_close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
+
 #endif /* __CR_UTIL_H__ */
diff --git a/criu/include/vma.h b/criu/include/vma.h
index 106c56af2..b8ddfc142 100644
--- a/criu/include/vma.h
+++ b/criu/include/vma.h
@@ -106,6 +106,7 @@ static inline bool vma_entry_is_private(VmaEntry *entry, unsigned long task_size
 	return (vma_entry_is(entry, VMA_AREA_REGULAR) &&
 		(vma_entry_is(entry, VMA_ANON_PRIVATE) || vma_entry_is(entry, VMA_FILE_PRIVATE)) &&
 		(entry->end <= task_size)) ||
+	       vma_entry_is(entry, VMA_AREA_SHSTK) ||
 	       vma_entry_is(entry, VMA_AREA_AIORING);
 }
 
@@ -122,8 +123,8 @@ static inline struct vma_area *vma_next(struct vma_area *vma)
 static inline bool vma_entry_can_be_lazy(VmaEntry *e)
 {
 	return ((e->flags & MAP_ANONYMOUS) && (e->flags & MAP_PRIVATE) && !(e->flags & MAP_LOCKED) &&
-		!(vma_entry_is(e, VMA_AREA_VDSO)) && !(vma_entry_is(e, VMA_AREA_VSYSCALL)) &&
-		!(e->flags & MAP_HUGETLB));
+		!(vma_entry_is(e, VMA_AREA_VDSO)) && !(vma_entry_is(e, VMA_AREA_VVAR)) &&
+		!(vma_entry_is(e, VMA_AREA_VSYSCALL)) && !(e->flags & MAP_HUGETLB));
 }
 
 #endif /* __CR_VMA_H__ */
diff --git a/criu/ipc_ns.c b/criu/ipc_ns.c
index 4fe082fbb..7e95be8c5 100644
--- a/criu/ipc_ns.c
+++ b/criu/ipc_ns.c
@@ -292,6 +292,8 @@ static void pr_info_ipc_shm(const IpcShmEntry *shm)
 
 static int ipc_sysctl_req(IpcVarEntry *e, int op)
 {
+	int i;
+
 	struct sysctl_req req[] = {
 		{ "kernel/sem", e->sem_ctls, CTL_U32A(e->n_sem_ctls) },
 		{ "kernel/msgmax", &e->msg_ctlmax, CTL_U32 },
@@ -332,6 +334,9 @@ static int ipc_sysctl_req(IpcVarEntry *e, int op)
 	if (e->has_shm_next_id)
 		req[nr++] = req[16];
 
+	for (i = 0; i < nr; i++)
+		req[i].flags = CTL_FLAGS_IPC_EACCES_SKIP;
+
 	return sysctl_op(req, nr, op, CLONE_NEWIPC);
 }
 
@@ -570,7 +575,7 @@ static int prepare_ipc_sem_desc(struct cr_img *img, const IpcSemEntry *sem)
 {
 	int ret, id;
 	struct sysctl_req req[] = {
-		{ "kernel/sem_next_id", &sem->desc->id, CTL_U32 },
+		{ "kernel/sem_next_id", &sem->desc->id, CTL_U32, CTL_FLAGS_IPC_EACCES_SKIP },
 	};
 	struct semid_ds semid;
 
@@ -703,7 +708,7 @@ static int prepare_ipc_msg_queue(struct cr_img *img, const IpcMsgEntry *msq)
 {
 	int ret, id;
 	struct sysctl_req req[] = {
-		{ "kernel/msg_next_id", &msq->desc->id, CTL_U32 },
+		{ "kernel/msg_next_id", &msq->desc->id, CTL_U32, CTL_FLAGS_IPC_EACCES_SKIP },
 	};
 	struct msqid_ds msqid;
 
@@ -841,7 +846,7 @@ static int prepare_ipc_shm_seg(struct cr_img *img, const IpcShmEntry *shm)
 {
 	int ret, id, hugetlb_flag = 0;
 	struct sysctl_req req[] = {
-		{ "kernel/shm_next_id", &shm->desc->id, CTL_U32 },
+		{ "kernel/shm_next_id", &shm->desc->id, CTL_U32, CTL_FLAGS_IPC_EACCES_SKIP },
 	};
 	struct shmid_ds shmid;
 
diff --git a/criu/irmap.c b/criu/irmap.c
index 7b9d77bc1..d2c5d588a 100644
--- a/criu/irmap.c
+++ b/criu/irmap.c
@@ -67,6 +67,7 @@ static struct irmap hints[] = {
 		.path = "/var/log",
 		.nr_kids = -1,
 	},
+	{ .path = "/usr/share/dbus-1/services", .nr_kids = -1 },
 	{ .path = "/usr/share/dbus-1/system-services", .nr_kids = -1 },
 	{ .path = "/var/lib/polkit-1/localauthority", .nr_kids = -1 },
 	{ .path = "/usr/share/polkit-1/actions", .nr_kids = -1 },
@@ -101,7 +102,7 @@ static int irmap_update_stat(struct irmap *i)
 
 	pr_debug("Refresh stat for %s\n", i->path);
 	if (fstatat(mntns_root, i->path + 1, &st, AT_SYMLINK_NOFOLLOW)) {
-		pr_perror("Can't stat %s", i->path);
+		pr_pwarn("Can't stat %s", i->path);
 		return -1;
 	}
 
@@ -136,7 +137,7 @@ static int irmap_update_dir(struct irmap *t)
 	pr_debug("Refilling %s dir\n", t->path);
 	fd = openat(mntns_root, t->path + 1, O_RDONLY);
 	if (fd < 0) {
-		pr_perror("Can't open %s", t->path);
+		pr_pwarn("Can't open %s", t->path);
 		return -1;
 	}
 
@@ -499,8 +500,13 @@ int irmap_scan_path_add(char *path)
 		return -1;
 	}
 
-	o->ir->path = path;
+	o->ir->path = xstrdup(path);
+	if (!o->ir->path) {
+		xfree(o->ir);
+		xfree(o);
+		return -1;
+	}
 	o->ir->nr_kids = -1;
-	list_add(&o->node, &opts.irmap_scan_paths);
+	list_add_tail(&o->node, &opts.irmap_scan_paths);
 	return 0;
 }
diff --git a/criu/kerndat.c b/criu/kerndat.c
index b8b6bc95d..2dc2f77d5 100644
--- a/criu/kerndat.c
+++ b/criu/kerndat.c
@@ -12,15 +12,17 @@
 #include <sys/sysmacros.h>
 #include <stdint.h>
 #include <sys/socket.h>
-#include <arpa/inet.h> /* for sockaddr_in and inet_ntoa() */
+#include <netinet/in.h>
 #include <sys/prctl.h>
 #include <sys/inotify.h>
 #include <sched.h>
 #include <sys/mount.h>
+#include <linux/membarrier.h>
 
 #if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
 #include <nftables/libnftables.h>
 #endif
+#include <sys/utsname.h>
 
 #include "common/config.h"
 #include "int.h"
@@ -29,6 +31,7 @@
 #include "kerndat.h"
 #include "fs-magic.h"
 #include "mem.h"
+#include "mman.h"
 #include "common/compiler.h"
 #include "sysctl.h"
 #include "cr_options.h"
@@ -51,13 +54,24 @@
 #include "sched.h"
 #include "memfd.h"
 #include "mount-v2.h"
+#include "util-caps.h"
+#include "pagemap_scan.h"
 
 struct kerndat_s kdat = {};
+volatile int dummy_var;
 
 static int check_pagemap(void)
 {
-	int ret, fd;
+	int ret, fd, retry;
 	u64 pfn = 0;
+	struct pm_scan_arg args = {
+		.size = sizeof(struct pm_scan_arg),
+		.flags = 0,
+		.category_inverted = PAGE_IS_PFNZERO | PAGE_IS_FILE,
+		.category_mask = PAGE_IS_PFNZERO | PAGE_IS_FILE,
+		.category_anyof_mask = PAGE_IS_PRESENT | PAGE_IS_SWAPPED,
+		.return_mask = PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_SOFT_DIRTY,
+	};
 
 	fd = __open_proc(PROC_SELF, EPERM, O_RDONLY, "pagemap");
 	if (fd < 0) {
@@ -70,11 +84,44 @@ static int check_pagemap(void)
 		return -1;
 	}
 
-	/* Get the PFN of some present page. Stack is here, so try it :) */
-	ret = pread(fd, &pfn, sizeof(pfn), (((unsigned long)&ret) / page_size()) * sizeof(pfn));
-	if (ret != sizeof(pfn)) {
-		pr_perror("Can't read pagemap");
-		return -1;
+	if (ioctl(fd, PAGEMAP_SCAN, &args) == 0) {
+		pr_debug("PAGEMAP_SCAN is supported\n");
+		kdat.has_pagemap_scan = true;
+
+		args.return_mask |= PAGE_IS_GUARD;
+		if (ioctl(fd, PAGEMAP_SCAN, &args) == 0)
+			kdat.has_pagemap_scan_guard_pages = true;
+	} else {
+		switch (errno) {
+		case EINVAL:
+		case ENOTTY:
+			pr_debug("PAGEMAP_SCAN isn't supported\n");
+			break;
+		default:
+			pr_perror("PAGEMAP_SCAN failed with unexpected errno");
+			return -1;
+		}
+	}
+
+	retry = 3;
+	while (retry--) {
+		++dummy_var;
+		/* Get the PFN of a page likely to be present. */
+		ret = pread(fd, &pfn, sizeof(pfn), PAGE_PFN((uintptr_t)&dummy_var) * sizeof(pfn));
+		if (ret != sizeof(pfn)) {
+			pr_perror("Can't read pagemap");
+			close(fd);
+			return -1;
+		}
+		/* The page can be swapped out by the time the read occurs,
+		 * in which case the rest of the bits are a swap type + offset
+		 * (which could be zero even if not hidden).
+		 * Retry if this happens. */
+		if (pfn & PME_PRESENT)
+			break;
+		pr_warn("got non-present PFN %#lx for the dummy data page; %s\n", (unsigned long)pfn,
+			retry ? "retrying" : "giving up");
+		pfn = 0;
 	}
 
 	close(fd);
@@ -420,10 +467,6 @@ static int kerndat_get_dirty_track(void)
 	} else {
 	no_dt:
 		pr_info("Dirty tracking support is OFF\n");
-		if (opts.track_mem) {
-			pr_err("Tracking memory is not available\n");
-			return -1;
-		}
 	}
 
 	return 0;
@@ -467,8 +510,15 @@ static int get_last_cap(void)
 	struct sysctl_req req[] = {
 		{ "kernel/cap_last_cap", &kdat.last_cap, CTL_U32 },
 	};
+	int ret;
 
-	return sysctl_op(req, ARRAY_SIZE(req), CTL_READ, 0);
+	ret = sysctl_op(req, ARRAY_SIZE(req), CTL_READ, 0);
+	if (ret || kdat.last_cap < 32 * CR_CAP_SIZE)
+		return ret;
+
+	pr_err("Kernel reports more capabilities than this CRIU supports: %u > %u\n",
+	       kdat.last_cap, 32 * CR_CAP_SIZE - 1);
+	return -1;
 }
 
 static bool kerndat_has_memfd_create(void)
@@ -502,7 +552,7 @@ static bool kerndat_has_memfd_hugetlb(void)
 	if (ret >= 0) {
 		kdat.has_memfd_hugetlb = true;
 		close(ret);
-	} else if (ret == -1 && (errno == EINVAL || errno == ENOENT)) {
+	} else if (ret == -1 && (errno == EINVAL || errno == ENOENT || errno == ENOSYS)) {
 		kdat.has_memfd_hugetlb = false;
 	} else {
 		pr_perror("Unexpected error from memfd_create(\"\", MFD_HUGETLB)");
@@ -601,7 +651,7 @@ static int kerndat_loginuid(void)
 static int kerndat_iptables_has_xtlocks(void)
 {
 	int fd;
-	char *argv[4] = { "sh", "-c", "iptables -w -L", NULL };
+	char *argv[4] = { "sh", "-c", "iptables -n -w -L", NULL };
 
 	fd = open("/dev/null", O_RDWR);
 	if (fd < 0) {
@@ -617,29 +667,52 @@ static int kerndat_iptables_has_xtlocks(void)
 	return 0;
 }
 
-int kerndat_tcp_repair(void)
-{
-	int sock, clnt = -1, yes = 1, exit_code = -1;
-	struct sockaddr_in addr;
-	socklen_t aux;
+/*
+ * Unfortunately in C htonl() is not constexpr and cannot be used in a static
+ * initialization below.
+ */
+#define constant_htonl(x) \
+	(__BYTE_ORDER == __BIG_ENDIAN ? (x) : \
+		(((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >>  8) | \
+		(((x) & 0x0000ff00) <<  8) | (((x) & 0x000000ff) << 24))
 
-	memset(&addr, 0, sizeof(addr));
-	addr.sin_family = AF_INET;
-	inet_pton(AF_INET, "127.0.0.1", &(addr.sin_addr));
-	addr.sin_port = 0;
+static int kerndat_tcp_repair(void)
+{
+	static const struct sockaddr_in loopback_ip4 = {
+		.sin_family = AF_INET,
+		.sin_port = 0,
+		.sin_addr = { constant_htonl(INADDR_LOOPBACK) },
+	};
+	static const struct sockaddr_in6 loopback_ip6 = {
+		.sin6_family = AF_INET6,
+		.sin6_port = 0,
+		.sin6_addr = IN6ADDR_LOOPBACK_INIT,
+	};
+	int sock, clnt = -1, yes = 1, exit_code = -1;
+	const struct sockaddr *addr;
+	struct sockaddr_storage listener_addr;
+	socklen_t addrlen;
+
+	addr = (const struct sockaddr *)&loopback_ip4;
+	addrlen = sizeof(loopback_ip4);
 	sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+	if (sock < 0 && errno == EAFNOSUPPORT) {
+		addr = (const struct sockaddr *)&loopback_ip6;
+		addrlen = sizeof(loopback_ip6);
+		sock = socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP);
+	}
 	if (sock < 0) {
 		pr_perror("Unable to create a socket");
 		return -1;
 	}
 
-	if (bind(sock, (struct sockaddr *)&addr, sizeof(addr))) {
+	if (bind(sock, addr, addrlen)) {
 		pr_perror("Unable to bind a socket");
 		goto err;
 	}
 
-	aux = sizeof(addr);
-	if (getsockname(sock, (struct sockaddr *)&addr, &aux)) {
+	addrlen = sizeof(listener_addr);
+	if (getsockname(sock, (struct sockaddr *)&listener_addr, &addrlen)) {
 		pr_perror("Unable to get a socket name");
 		goto err;
 	}
@@ -649,13 +722,13 @@ int kerndat_tcp_repair(void)
 		goto err;
 	}
 
-	clnt = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+	clnt = socket(addr->sa_family, SOCK_STREAM, IPPROTO_TCP);
 	if (clnt < 0) {
 		pr_perror("Unable to create a socket");
 		goto err;
 	}
 
-	if (connect(clnt, (struct sockaddr *)&addr, sizeof(addr))) {
+	if (connect(clnt, (const struct sockaddr *)&listener_addr, addrlen)) {
 		pr_perror("Unable to connect a socket");
 		goto err;
 	}
@@ -682,20 +755,22 @@ err:
 	return exit_code;
 }
 
-int kerndat_nsid(void)
+static int kerndat_nsid(void)
 {
 	int nsid, sk;
 
+	kdat.has_nsid = false;
+
 	sk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
 	if (sk < 0) {
-		pr_perror("Unable to create a netlink socket");
-		return -1;
+		pr_pwarn("Unable to create a netlink socket: NSID can't be used.");
+		return 0;
 	}
 
 	if (net_get_nsid(sk, getpid(), &nsid) < 0) {
-		pr_err("NSID is not supported\n");
+		pr_warn("NSID is not supported\n");
 		close(sk);
-		return -1;
+		return 0;
 	}
 
 	kdat.has_nsid = true;
@@ -764,7 +839,7 @@ static int kerndat_detect_stack_guard_gap(void)
 		 * (see kernel commit 1be7107fbe18ee).
 		 *
 		 * Same time there was semi-complete
-		 * patch released which hitted a number
+		 * patch released which hit a number
 		 * of repos (Ubuntu, Fedora) where instead
 		 * of PAGE_SIZE the 1M gap is cut off.
 		 */
@@ -927,6 +1002,7 @@ static int kerndat_has_ptrace_get_rseq_conf(void)
 	pid_t pid;
 	int len;
 	struct __ptrace_rseq_configuration rseq;
+	int ret = 0;
 
 	pid = fork_and_ptrace_attach(NULL);
 	if (pid < 0)
@@ -934,6 +1010,9 @@ static int kerndat_has_ptrace_get_rseq_conf(void)
 
 	len = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, pid, sizeof(rseq), &rseq);
 	if (len != sizeof(rseq)) {
+		if (kdat.has_ptrace_get_rseq_conf)
+			ret = 1; /* we should update kdat */
+
 		kdat.has_ptrace_get_rseq_conf = false;
 		pr_info("ptrace(PTRACE_GET_RSEQ_CONFIGURATION) is not supported\n");
 		goto out;
@@ -944,16 +1023,27 @@ static int kerndat_has_ptrace_get_rseq_conf(void)
 	 * we need to pay attention to that and, possibly, make changes on the CRIU side.
 	 */
 	if (rseq.flags != 0) {
+		if (kdat.has_ptrace_get_rseq_conf)
+			ret = 1; /* we should update kdat */
+
 		kdat.has_ptrace_get_rseq_conf = false;
 		pr_err("ptrace(PTRACE_GET_RSEQ_CONFIGURATION): rseq.flags != 0\n");
 	} else {
+		if (!kdat.has_ptrace_get_rseq_conf)
+			ret = 1; /* we should update kdat */
+
 		kdat.has_ptrace_get_rseq_conf = true;
+
+		if (memcmp(&kdat.libc_rseq_conf, &rseq, sizeof(rseq)))
+			ret = 1; /* we should update kdat */
+
+		kdat.libc_rseq_conf = rseq;
 	}
 
 out:
 	kill(pid, SIGKILL);
 	waitpid(pid, NULL, 0);
-	return 0;
+	return ret;
 }
 
 int kerndat_sockopt_buf_lock(void)
@@ -964,6 +1054,8 @@ int kerndat_sockopt_buf_lock(void)
 	int sock;
 
 	sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+	if (sock < 0 && errno == EAFNOSUPPORT)
+		sock = socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP);
 	if (sock < 0) {
 		pr_perror("Unable to create a socket");
 		return -1;
@@ -1042,9 +1134,9 @@ static int kerndat_has_move_mount_set_group(void)
 	exit_code = 0;
 out:
 	if (umount2(tmpdir, MNT_DETACH))
-		pr_warn("Fail to umount2 %s: %m\n", tmpdir);
+		pr_warn("Fail to umount2 %s: %s\n", tmpdir, strerror(errno));
 	if (rmdir(tmpdir))
-		pr_warn("Fail to rmdir %s: %m\n", tmpdir);
+		pr_warn("Fail to rmdir %s: %s\n", tmpdir, strerror(errno));
 	return exit_code;
 }
 
@@ -1064,19 +1156,84 @@ static int kerndat_has_openat2(void)
 	return 0;
 }
 
-#define KERNDAT_CACHE_FILE     KDAT_RUNDIR "/criu.kdat"
-#define KERNDAT_CACHE_FILE_TMP KDAT_RUNDIR "/.criu.kdat"
+int __attribute__((weak)) kdat_has_shstk(void)
+{
+	return 0;
+}
 
+static int kerndat_has_shstk(void)
+{
+	int ret = kdat_has_shstk();
+
+	if (ret < 0) {
+		pr_err("kdat_has_shstk failed\n");
+		return ret;
+	}
+
+	kdat.has_shstk = !!ret;
+	return 0;
+}
+
+#define KERNDAT_CACHE_NAME "criu.kdat"
+#define KERNDAT_CACHE_FILE KDAT_RUNDIR "/" KERNDAT_CACHE_NAME
+
+/*
+ * Returns:
+ * -1 if kdat_file was not written due to error
+ * 0 if kdat_file was written
+ * 1 if kdat_file was not written because cache directory undefined in env (non-root mode)
+ */
+static int get_kerndat_filename(char **kdat_file)
+{
+	int ret;
+
+	/*
+	 * Running as non-root, even with CAP_CHECKPOINT_RESTORE, does not
+	 * allow to write to KDAT_RUNDIR which usually is only writable by root.
+	 * Let's write criu.kdat file to XDG_RUNTIME_DIR for non-root cases.
+	 * Note that XDG_RUNTIME_DIR is not always defined (e.g. when executing
+	 * via su/sudo).
+	 */
+	if (opts.unprivileged) {
+		const char *cache_dir = getenv("XDG_RUNTIME_DIR");
+		if (!cache_dir) {
+			pr_warn("$XDG_RUNTIME_DIR not set. Cannot find location for kerndat file\n");
+			return 1;
+		}
+		ret = asprintf(kdat_file, "%s/%s", cache_dir, KERNDAT_CACHE_NAME);
+	} else {
+		ret = asprintf(kdat_file, "%s", KERNDAT_CACHE_FILE);
+	}
+
+	if (unlikely(ret < 0)) {
+		pr_warn("Cannot allocate memory for kerndat file name\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Returns:
+ * -1 if error
+ * 0 if cache was loaded
+ * 1 if cache does not exist or is stale or cache directory undefined in env (non-root mode)
+ */
 static int kerndat_try_load_cache(void)
 {
+	cleanup_free char *kdat_file = NULL;
 	int fd, ret;
 
-	fd = open(KERNDAT_CACHE_FILE, O_RDONLY);
+	ret = get_kerndat_filename(&kdat_file);
+	if (ret)
+		return ret;
+
+	fd = open(kdat_file, O_RDONLY);
 	if (fd < 0) {
 		if (ENOENT == errno)
-			pr_debug("File %s does not exist\n", KERNDAT_CACHE_FILE);
+			pr_debug("File %s does not exist\n", kdat_file);
 		else
-			pr_warn("Can't load %s\n", KERNDAT_CACHE_FILE);
+			pr_warn("Can't load %s\n", kdat_file);
 		return 1;
 	}
 
@@ -1090,12 +1247,12 @@ static int kerndat_try_load_cache(void)
 	close(fd);
 
 	if (ret != sizeof(kdat) || kdat.magic1 != KDAT_MAGIC || kdat.magic2 != KDAT_MAGIC_2) {
-		pr_warn("Stale %s file\n", KERNDAT_CACHE_FILE);
-		unlink(KERNDAT_CACHE_FILE);
+		pr_warn("Stale %s file\n", kdat_file);
+		unlink(kdat_file);
 		return 1;
 	}
 
-	pr_info("Loaded kdat cache from %s\n", KERNDAT_CACHE_FILE);
+	pr_info("Loaded kdat cache from %s\n", kdat_file);
 	return 0;
 }
 
@@ -1103,8 +1260,20 @@ static void kerndat_save_cache(void)
 {
 	int fd, ret;
 	struct statfs s;
+	cleanup_free char *kdat_file = NULL;
+	cleanup_free char *kdat_file_tmp = NULL;
 
-	fd = open(KERNDAT_CACHE_FILE_TMP, O_CREAT | O_EXCL | O_WRONLY, 0600);
+	if (get_kerndat_filename(&kdat_file))
+		return;
+
+	ret = asprintf(&kdat_file_tmp, "%s.tmp", kdat_file);
+
+	if (unlikely(ret < 0)) {
+		pr_warn("Cannot allocate memory for kerndat file name\n");
+		return;
+	}
+
+	fd = open(kdat_file_tmp, O_CREAT | O_EXCL | O_WRONLY, 0600);
 	if (fd < 0)
 		/*
 		 * It can happen that we race with some other criu
@@ -1113,6 +1282,10 @@ static void kerndat_save_cache(void)
 		 */
 		return;
 
+	/*
+	 * If running as root we store the cache file on a tmpfs (/run),
+	 * because the file should be gone after reboot.
+	 */
 	if (fstatfs(fd, &s) < 0 || s.f_type != TMPFS_MAGIC) {
 		pr_warn("Can't keep kdat cache on non-tempfs\n");
 		close(fd);
@@ -1126,20 +1299,21 @@ static void kerndat_save_cache(void)
 	 */
 	kdat.magic1 = KDAT_MAGIC;
 	kdat.magic2 = KDAT_MAGIC_2;
+
 	ret = write(fd, &kdat, sizeof(kdat));
 	close(fd);
 
 	if (ret == sizeof(kdat))
-		ret = rename(KERNDAT_CACHE_FILE_TMP, KERNDAT_CACHE_FILE);
+		ret = rename(kdat_file_tmp, kdat_file);
 	else {
 		ret = -1;
 		errno = EIO;
 	}
 
 	if (ret < 0) {
-		pr_perror("Couldn't save %s", KERNDAT_CACHE_FILE);
+		pr_perror("Couldn't save %s", kdat_file);
 	unl:
-		unlink(KERNDAT_CACHE_FILE_TMP);
+		unlink(kdat_file);
 	}
 }
 
@@ -1147,6 +1321,14 @@ static int kerndat_uffd(void)
 {
 	int uffd, err = 0;
 
+	if (opts.unprivileged)
+		/*
+		 * If running as non-root uffd_open() fails with
+		 * 'Operation not permitted'. Just ignore uffd for
+		 * non-root for now.
+		 */
+		return 0;
+
 	kdat.uffd_features = 0;
 	uffd = uffd_open(0, &kdat.uffd_features, &err);
 
@@ -1239,6 +1421,8 @@ int kerndat_has_thp_disable(void)
 
 			parse_vmflags(str, &flags, &madv, &io_pf);
 			kdat.has_thp_disable = !(madv & (1 << MADV_NOHUGEPAGE));
+			if (!kdat.has_thp_disable)
+				pr_warn("prctl PR_SET_THP_DISABLE sets MADV_NOHUGEPAGE\n");
 			break;
 		}
 	}
@@ -1282,17 +1466,20 @@ static bool kerndat_has_clone3_set_tid(void)
 	 */
 	pid = syscall(__NR_clone3, &args, sizeof(args));
 
-	if (pid == -1 && (errno == ENOSYS || errno == E2BIG)) {
-		kdat.has_clone3_set_tid = false;
-		return 0;
-	}
-	if (pid == -1 && errno == EINVAL) {
-		kdat.has_clone3_set_tid = true;
-	} else {
-		pr_perror("Unexpected error from clone3");
+	if (pid != -1) {
+		pr_err("Unexpected success: clone3() returned %d\n", pid);
 		return -1;
 	}
 
+	if (errno == ENOSYS || errno == E2BIG)
+		return 0;
+
+	if (errno != EINVAL) {
+		pr_pwarn("Unexpected error from clone3");
+		return 0;
+	}
+
+	kdat.has_clone3_set_tid = true;
 	return 0;
 }
 
@@ -1420,7 +1607,9 @@ static int __has_nftables_concat(void *arg)
 		return 1;
 
 	if (NFT_RUN_CMD(nft, "create table inet CRIU")) {
-		pr_err("Can't create nftables table\n");
+		pr_warn("Can't create nftables table\n");
+		*has = false; /* kdat.has_nftables_concat = false */
+		ret = 0;
 		goto nft_ctx_free_out;
 	}
 
@@ -1456,6 +1645,214 @@ static int kerndat_has_nftables_concat(void)
 #endif
 }
 
+#ifndef IPV6_FREEBIND
+#define IPV6_FREEBIND 78
+#endif
+
+static int __kerndat_has_ipv6_freebind(int sk)
+{
+	int val = 1;
+
+	if (setsockopt(sk, SOL_IPV6, IPV6_FREEBIND, &val, sizeof(int)) == -1) {
+		if (errno == ENOPROTOOPT) {
+			kdat.has_ipv6_freebind = false;
+			return 0;
+		}
+		pr_perror("Unable to setsockopt ipv6_freebind");
+		return -1;
+	}
+
+	kdat.has_ipv6_freebind = true;
+	return 0;
+}
+
+static int kerndat_has_ipv6_freebind(void)
+{
+	int sk, ret;
+
+	if (!kdat.ipv6) {
+		kdat.has_ipv6_freebind = false;
+		return 0;
+	}
+
+	sk = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP);
+	if (sk == -1) {
+		pr_perror("Unable to create a ipv6 dgram socket");
+		return -1;
+	}
+
+	ret = __kerndat_has_ipv6_freebind(sk);
+	close(sk);
+	return ret;
+}
+
+#define MEMBARRIER_CMDBIT_GET_REGISTRATIONS 9
+
+static int kerndat_has_membarrier_get_registrations(void)
+{
+	int ret = syscall(__NR_membarrier, 1 << MEMBARRIER_CMDBIT_GET_REGISTRATIONS, 0);
+	if (ret < 0) {
+		if (errno != EINVAL) {
+			return ret;
+		}
+
+		kdat.has_membarrier_get_registrations = false;
+	} else {
+		kdat.has_membarrier_get_registrations = true;
+	}
+
+	return 0;
+}
+
+static int kerndat_has_close_range(void)
+{
+	/* fd is greater than max_fd, so close_range should return EINVAL. */
+	if (cr_close_range(2, 1, 0) == 0) {
+		pr_err("close_range succeeded unexpectedly\n");
+		return -1;
+	}
+
+	if (errno == ENOSYS) {
+		pr_debug("close_range isn't supported\n");
+		return 0;
+	}
+	if (errno != EINVAL) {
+		pr_perror("close_range returned unexpected error code");
+		return -1;
+	}
+
+	kdat.has_close_range = true;
+	return 0;
+}
+
+static int kerndat_has_timer_cr_ids(void)
+{
+	if (prctl(PR_TIMER_CREATE_RESTORE_IDS,
+		  PR_TIMER_CREATE_RESTORE_IDS_GET, 0, 0, 0) == -1) {
+		if (errno == EINVAL) {
+			pr_debug("PR_TIMER_CREATE_RESTORE_IDS isn't supported\n");
+			return 0;
+		}
+		pr_perror("prctl returned unexpected error code");
+		return -1;
+	}
+
+	kdat.has_timer_cr_ids = true;
+	return 0;
+}
+
+static void breakpoint_func(void)
+{
+	if (raise(SIGSTOP))
+		pr_perror("Unable to kill itself with SIGSTOP");
+	exit(1);
+}
+
+/*
+ * kerndat_breakpoints checks that hardware breakpoints work as they should.
+ * In some cases, they might not work in virtual machines if the hypervisor
+ * doesn't virtualize them. For example, they don't work in AMD SEV virtual
+ * machines if the Debug Virtualization extension isn't supported or isn't
+ * enabled in SEV_FEATURES.
+ */
+static int kerndat_breakpoints(void)
+{
+	int status, ret, exit_code = -1;
+	pid_t pid;
+
+	pid = fork();
+	if (pid == -1) {
+		pr_perror("fork");
+		return -1;
+	}
+	if (pid == 0) {
+		if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+			pr_perror("ptrace(PTRACE_TRACEME)");
+			exit(1);
+		}
+		raise(SIGSTOP);
+		breakpoint_func();
+		exit(1);
+	}
+	if (waitpid(pid, &status, 0) == -1) {
+		pr_perror("waitpid for initial stop");
+		goto err;
+	}
+	if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP) {
+		pr_err("Child didn't stop as expected: status=%x\n", status);
+		goto err;
+	}
+	ret = ptrace_set_breakpoint(pid, &breakpoint_func);
+	if (ret < 0) {
+		pr_err("Failed to set breakpoint\n");
+		goto err;
+	}
+	if (ret == 0) {
+		pr_debug("Hardware breakpoints appear to be disabled\n");
+		goto out;
+	}
+	if (waitpid(pid, &status, 0) == -1) {
+		pr_perror("waitpid for breakpoint trigger");
+		goto err;
+	}
+	if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGTRAP) {
+		pr_warn("Hardware breakpoints don't seem to work (status=%x)\n", status);
+		goto out;
+	}
+	kdat.has_breakpoints = true;
+out:
+	exit_code = 0;
+err:
+	if (kill(pid, SIGKILL)) {
+		pr_perror("Failed to kill the child process");
+		exit_code = -1;
+	}
+	if (waitpid(pid, &status, 0) == -1) {
+		pr_perror("Failed to wait for the child process");
+		exit_code = -1;
+	}
+	if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGKILL) {
+		pr_err("The child exited with unexpected code: %x\n", status);
+		exit_code = -1;
+	}
+	return exit_code;
+}
+
+static int kerndat_has_madv_guard(void)
+{
+	void *map;
+
+	map = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+	if (map == MAP_FAILED) {
+		pr_perror("Can't mmap a page for has_madv_guard feature test");
+		return -1;
+	}
+
+	if (madvise(map, PAGE_SIZE, MADV_GUARD_INSTALL)) {
+		if (errno != EINVAL) {
+			pr_perror("madvise failed (has_madv_guard check)");
+			goto mmap_cleanup;
+		}
+	} else {
+		kdat.has_madv_guard = true;
+	}
+
+	munmap(map, PAGE_SIZE);
+	return 0;
+
+mmap_cleanup:
+	munmap(map, PAGE_SIZE);
+	return -1;
+}
+
+void kerndat_warn_about_madv_guards(void)
+{
+	if (kdat.has_madv_guard && !kdat.has_pagemap_scan_guard_pages)
+		pr_warn("ioctl(PAGEMAP_SCAN) doesn't support PAGE_IS_GUARD flag. "
+			"CRIU dump will fail if dumped processes use madvise(MADV_GUARD_INSTALL). "
+			"Please, consider updating your kernel.\n");
+}
+
 /*
  * Some features depend on resource that can be dynamically changed
  * at the OS runtime. There are cases that we cannot determine the
@@ -1476,12 +1873,63 @@ int kerndat_try_load_new(void)
 	if (ret < 0)
 		return ret;
 
+	ret = kerndat_has_ptrace_get_rseq_conf();
+	if (ret < 0) {
+		pr_err("kerndat_has_ptrace_get_rseq_conf failed when initializing kerndat.\n");
+		return ret;
+	}
+
+	ret = kerndat_has_shstk();
+	if (ret < 0) {
+		pr_err("kerndat_has_shstk failed when initializing kerndat.\n");
+		return ret;
+	}
+
 	/* New information is found, we need to save to the cache */
 	if (ret)
 		kerndat_save_cache();
 	return 0;
 }
 
+static int root_only_init(void)
+{
+	int ret = 0;
+
+	if (opts.unprivileged)
+		return 0;
+
+	if (!ret && kerndat_loginuid()) {
+		pr_err("kerndat_loginuid failed when initializing kerndat.\n");
+		ret = -1;
+	}
+	if (!ret && kerndat_tun_netns()) {
+		pr_err("kerndat_tun_netns failed when initializing kerndat.\n");
+		ret = -1;
+	}
+	if (!ret && kerndat_socket_unix_file()) {
+		pr_err("kerndat_socket_unix_file failed when initializing kerndat.\n");
+		ret = -1;
+	}
+	if (!ret && kerndat_link_nsid()) {
+		pr_err("kerndat_link_nsid failed when initializing kerndat.\n");
+		ret = -1;
+	}
+	if (!ret && kerndat_socket_netns()) {
+		pr_err("kerndat_socket_netns failed when initializing kerndat.\n");
+		ret = -1;
+	}
+	if (!ret && kerndat_has_nftables_concat()) {
+		pr_err("kerndat_has_nftables_concat failed when initializing kerndat.\n");
+		ret = -1;
+	}
+	if (!ret && kerndat_has_move_mount_set_group()) {
+		pr_err("kerndat_has_move_mount_set_group failed when initializing kerndat.\n");
+		ret = -1;
+	}
+
+	return ret;
+}
+
 int kerndat_init(void)
 {
 	int ret;
@@ -1499,7 +1947,16 @@ int kerndat_init(void)
 	memset(&kdat, 0, sizeof(kdat));
 
 	preload_socket_modules();
-	preload_netfilter_modules();
+	if (!opts.unprivileged)
+		/*
+		 * This uses 'iptables -L' to implicitly load necessary modules.
+		 * If the non nft backed iptables is used it does a
+		 * openat(AT_FDCWD, "/run/xtables.lock", O_RDONLY|O_CREAT, 0600) = -1 EACCES
+		 * which will fail as non-root. There are no capabilities to
+		 * change this. The iptables nft backend fails with
+		 * openat(AT_FDCWD, "/proc/net/ip_tables_names", O_RDONLY) = -1 EACCES
+		 */
+		preload_netfilter_modules();
 
 	if (check_pagemap()) {
 		pr_err("check_pagemap failed when initializing kerndat.\n");
@@ -1537,10 +1994,14 @@ int kerndat_init(void)
 		pr_err("get_ipv6 failed when initializing kerndat.\n");
 		ret = -1;
 	}
-	if (!ret && kerndat_loginuid()) {
-		pr_err("kerndat_loginuid failed when initializing kerndat.\n");
+	if (!ret && kerndat_nsid()) {
+		pr_err("kerndat_nsid failed when initializing kerndat.\n");
 		ret = -1;
 	}
+
+	if (!ret && root_only_init())
+		ret = -1;
+
 	if (!ret && kerndat_iptables_has_xtlocks()) {
 		pr_err("kerndat_iptables_has_xtlocks failed when initializing kerndat.\n");
 		ret = -1;
@@ -1553,22 +2014,6 @@ int kerndat_init(void)
 		pr_err("kerndat_compat_restore failed when initializing kerndat.\n");
 		ret = -1;
 	}
-	if (!ret && kerndat_tun_netns()) {
-		pr_err("kerndat_tun_netns failed when initializing kerndat.\n");
-		ret = -1;
-	}
-	if (!ret && kerndat_socket_unix_file()) {
-		pr_err("kerndat_socket_unix_file failed when initializing kerndat.\n");
-		ret = -1;
-	}
-	if (!ret && kerndat_nsid()) {
-		pr_err("kerndat_nsid failed when initializing kerndat.\n");
-		ret = -1;
-	}
-	if (!ret && kerndat_link_nsid()) {
-		pr_err("kerndat_link_nsid failed when initializing kerndat.\n");
-		ret = -1;
-	}
 	if (!ret && kerndat_has_memfd_create()) {
 		pr_err("kerndat_has_memfd_create failed when initializing kerndat.\n");
 		ret = -1;
@@ -1599,10 +2044,6 @@ int kerndat_init(void)
 		pr_err("kerndat_vdso_preserves_hint failed when initializing kerndat.\n");
 		ret = -1;
 	}
-	if (!ret && kerndat_socket_netns()) {
-		pr_err("kerndat_socket_netns failed when initializing kerndat.\n");
-		ret = -1;
-	}
 	if (!ret && kerndat_x86_has_ptrace_fpu_xsave_bug()) {
 		pr_err("kerndat_x86_has_ptrace_fpu_xsave_bug failed when initializing kerndat.\n");
 		ret = -1;
@@ -1627,7 +2068,7 @@ int kerndat_init(void)
 		pr_err("has_time_namespace failed when initializing kerndat.\n");
 		ret = -1;
 	}
-	if (!ret && kerndat_has_newifindex()) {
+	if (!ret && (!opts.unprivileged || has_cap_net_admin(opts.cap_eff)) && kerndat_has_newifindex()) {
 		pr_err("kerndat_has_newifindex failed when initializing kerndat.\n");
 		ret = -1;
 	}
@@ -1641,18 +2082,10 @@ int kerndat_init(void)
 		pr_err("kerndat_has_nspid failed when initializing kerndat.\n");
 		ret = -1;
 	}
-	if (!ret && kerndat_has_nftables_concat()) {
-		pr_err("kerndat_has_nftables_concat failed when initializing kerndat.\n");
-		ret = -1;
-	}
 	if (!ret && kerndat_sockopt_buf_lock()) {
 		pr_err("kerndat_sockopt_buf_lock failed when initializing kerndat.\n");
 		ret = -1;
 	}
-	if (!ret && kerndat_has_move_mount_set_group()) {
-		pr_err("kerndat_has_move_mount_set_group failed when initializing kerndat.\n");
-		ret = -1;
-	}
 	if (!ret && kerndat_has_openat2()) {
 		pr_err("kerndat_has_openat2 failed when initializing kerndat.\n");
 		ret = -1;
@@ -1661,10 +2094,38 @@ int kerndat_init(void)
 		pr_err("kerndat_has_rseq failed when initializing kerndat.\n");
 		ret = -1;
 	}
-	if (!ret && kerndat_has_ptrace_get_rseq_conf()) {
+	if (!ret && (kerndat_has_ptrace_get_rseq_conf() < 0)) {
 		pr_err("kerndat_has_ptrace_get_rseq_conf failed when initializing kerndat.\n");
 		ret = -1;
 	}
+	if (!ret && (kerndat_has_ipv6_freebind() < 0)) {
+		pr_err("kerndat_has_ipv6_freebind failed when initializing kerndat.\n");
+		ret = -1;
+	}
+	if (!ret && kerndat_has_membarrier_get_registrations()) {
+		pr_err("kerndat_has_membarrier_get_registrations failed when initializing kerndat.\n");
+		ret = -1;
+	}
+	if (!ret && kerndat_has_shstk()) {
+		pr_err("kerndat_has_shstk failed when initializing kerndat.\n");
+		ret = -1;
+	}
+	if (!ret && kerndat_has_close_range()) {
+		pr_err("kerndat_has_close_range has failed when initializing kerndat.\n");
+		ret = -1;
+	}
+	if (!ret && kerndat_has_timer_cr_ids()) {
+		pr_err("kerndat_has_timer_cr_ids has failed when initializing kerndat.\n");
+		ret = -1;
+	}
+	if (!ret && kerndat_breakpoints()) {
+		pr_err("kerndat_breakpoints has failed when initializing kerndat.\n");
+		ret = -1;
+	}
+	if (!ret && kerndat_has_madv_guard()) {
+		pr_err("kerndat_has_madv_guard has failed when initializing kerndat.\n");
+		ret = -1;
+	}
 
 	kerndat_lsm();
 	kerndat_mmap_min_addr();
diff --git a/criu/libnetlink.c b/criu/libnetlink.c
index f0304b0db..c7a84a44d 100644
--- a/criu/libnetlink.c
+++ b/criu/libnetlink.c
@@ -214,8 +214,3 @@ int __wrap_nlmsg_parse(struct nlmsghdr *nlh, int hdrlen, struct nlattr *tb[], in
 
 	return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen), policy);
 }
-
-int32_t nla_get_s32(const struct nlattr *nla)
-{
-	return *(const int32_t *)nla_data(nla);
-}
diff --git a/criu/log.c b/criu/log.c
index c4ce90ec0..bf6f657f2 100644
--- a/criu/log.c
+++ b/criu/log.c
@@ -10,6 +10,7 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/utsname.h>
+#include <sys/mman.h>
 
 #include <fcntl.h>
 
@@ -71,7 +72,8 @@ static void print_ts(void)
 
 	gettimeofday(&t, NULL);
 	timediff(&start, &t);
-	snprintf(buffer, TS_BUF_OFF, "(%02u.%06u)", (unsigned)t.tv_sec, (unsigned)t.tv_usec);
+	snprintf(buffer, TS_BUF_OFF, "(%02u.%06u", (unsigned)t.tv_sec, (unsigned)t.tv_usec);
+	buffer[TS_BUF_OFF - 2] = ')'; /* this will overwrite the last digit if tv_sec>=100 */
 	buffer[TS_BUF_OFF - 1] = ' '; /* kill the '\0' produced by snprintf */
 }
 
@@ -113,6 +115,9 @@ static struct str_and_lock *first_err;
 
 int log_keep_err(void)
 {
+	if (first_err)
+		return 0;
+
 	first_err = shmalloc(sizeof(struct str_and_lock));
 	if (first_err == NULL)
 		return -1;
@@ -131,10 +136,11 @@ static void log_note_err(char *msg)
 		 * anyway, so it doesn't make much sense to try hard
 		 * and optimize this out.
 		 */
-		mutex_lock(&first_err->l);
-		if (first_err->s[0] == '\0')
-			strlcpy(first_err->s, msg, sizeof(first_err->s));
-		mutex_unlock(&first_err->l);
+		if (mutex_trylock(&first_err->l)) {
+			if (first_err->s[0] == '\0')
+				__strlcpy(first_err->s, msg, sizeof(first_err->s));
+			mutex_unlock(&first_err->l);
+		}
 	}
 }
 
@@ -184,7 +190,7 @@ void flush_early_log_buffer(int fd)
 		 * with reading the log_level.
 		 */
 		struct early_log_hdr *hdr = (void *)early_log_buffer + pos;
-		pos += sizeof(hdr);
+		pos += sizeof(*hdr);
 		if (hdr->level <= current_loglevel) {
 			size_t size = 0;
 			while (size < hdr->len) {
@@ -196,7 +202,7 @@ void flush_early_log_buffer(int fd)
 		}
 		pos += hdr->len;
 	}
-	if (early_log_buf_off == EARLY_LOG_BUF_LEN)
+	if ((early_log_buf_off + sizeof(struct early_log_hdr)) >= EARLY_LOG_BUF_LEN)
 		pr_warn("The early log buffer is full, some messages may have been lost\n");
 	early_log_buf_off = 0;
 }
@@ -314,10 +320,10 @@ unsigned int log_get_loglevel(void)
 
 static void early_vprint(const char *format, unsigned int loglevel, va_list params)
 {
-	unsigned int log_size = 0;
+	int log_size = 0, log_space;
 	struct early_log_hdr *hdr;
 
-	if ((early_log_buf_off + sizeof(hdr)) >= EARLY_LOG_BUF_LEN)
+	if ((early_log_buf_off + sizeof(*hdr)) >= EARLY_LOG_BUF_LEN)
 		return;
 
 	/* Save loglevel */
@@ -325,7 +331,8 @@ static void early_vprint(const char *format, unsigned int loglevel, va_list para
 	hdr = (void *)early_log_buffer + early_log_buf_off;
 	hdr->level = loglevel;
 	/* Skip the log entry size */
-	early_log_buf_off += sizeof(hdr);
+	early_log_buf_off += sizeof(*hdr);
+	log_space = EARLY_LOG_BUF_LEN - early_log_buf_off;
 	if (loglevel >= LOG_TIMESTAMP) {
 		/*
 		 * If logging is not yet setup we just write zeros
@@ -333,12 +340,17 @@ static void early_vprint(const char *format, unsigned int loglevel, va_list para
 		 * keep the same format as the other messages on
 		 * log levels with timestamps (>=LOG_TIMESTAMP).
 		 */
-		log_size = snprintf(early_log_buffer + early_log_buf_off, sizeof(early_log_buffer) - early_log_buf_off,
+		log_size = snprintf(early_log_buffer + early_log_buf_off, log_space,
 				    "(00.000000) ");
 	}
 
-	log_size += vsnprintf(early_log_buffer + early_log_buf_off + log_size,
-			      sizeof(early_log_buffer) - early_log_buf_off - log_size, format, params);
+	if (log_size < log_space)
+		log_size += vsnprintf(early_log_buffer + early_log_buf_off + log_size,
+				      log_space - log_size, format, params);
+	if (log_size > log_space) {
+		/* vsnprintf always add the terminating null byte. */
+		log_size = log_space - 1;
+	}
 
 	/* Save log entry size */
 	hdr->len = log_size;
@@ -397,15 +409,28 @@ void print_on_level(unsigned int loglevel, const char *format, ...)
 
 int write_pidfile(int pid)
 {
-	int fd;
+	int fd, ret, exit_code = -1;
 
 	fd = open(opts.pidfile, O_WRONLY | O_EXCL | O_CREAT, 0600);
 	if (fd == -1) {
-		pr_perror("Can't open %s", opts.pidfile);
+		pr_perror("pidfile: Can't open %s", opts.pidfile);
 		return -1;
 	}
 
-	dprintf(fd, "%d", pid);
+	ret = dprintf(fd, "%d", pid);
+	if (ret < 0) {
+		pr_perror("pidfile: Can't write pid %d to %s", pid, opts.pidfile);
+		goto close;
+	}
+
+	if (ret == 0) {
+		pr_err("pidfile: Can't write pid %d to %s\n", pid, opts.pidfile);
+		goto close;
+	}
+
+	pr_debug("pidfile: Wrote pid %d to %s (%d bytes)\n", pid, opts.pidfile, ret);
+	exit_code = 0;
+close:
 	close(fd);
-	return 0;
+	return exit_code;
 }
diff --git a/criu/lsm.c b/criu/lsm.c
index d1b73cc79..5faf3e5b2 100644
--- a/criu/lsm.c
+++ b/criu/lsm.c
@@ -29,7 +29,9 @@ static int apparmor_get_label(pid_t pid, char **profile_name)
 	FILE *f;
 	char *space;
 
-	f = fopen_proc(pid, "attr/current");
+	f = fopen_proc(pid, "attr/apparmor/current");
+	if (!f)
+		f = fopen_proc(pid, "attr/current");
 	if (!f)
 		return -1;
 
@@ -370,7 +372,7 @@ int render_lsm_profile(char *profile, char **val)
 	case LSMTYPE__APPARMOR:
 		return render_aa_profile(val, profile);
 	case LSMTYPE__SELINUX:
-		if (asprintf(val, "%s", profile) < 0) {
+		if (asprintf(val, "%s", opts.lsm_supplied ? opts.lsm_profile : profile) < 0) {
 			*val = NULL;
 			return -1;
 		}
diff --git a/criu/mem.c b/criu/mem.c
index 136439518..9e8740c07 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -10,6 +10,7 @@
 #include "cr_options.h"
 #include "servicefd.h"
 #include "mem.h"
+#include "mman.h"
 #include "parasite-syscall.h"
 #include "parasite.h"
 #include "page-pipe.h"
@@ -99,7 +100,7 @@ static inline bool __page_in_parent(bool dirty)
 	return opts.track_mem && opts.img_parent && !dirty;
 }
 
-bool should_dump_page(VmaEntry *vmae, u64 pme)
+static bool should_dump_entire_vma(VmaEntry *vmae)
 {
 	/*
 	 * vDSO area must be always dumped because on restore
@@ -107,30 +108,83 @@ bool should_dump_page(VmaEntry *vmae, u64 pme)
 	 */
 	if (vma_entry_is(vmae, VMA_AREA_VDSO))
 		return true;
-	/*
-	 * In turn VVAR area is special and referenced from
-	 * vDSO area by IP addressing (at least on x86) thus
-	 * never ever dump its content but always use one provided
-	 * by the kernel on restore, ie runtime VVAR area must
-	 * be remapped into proper place..
-	 */
-	if (vma_entry_is(vmae, VMA_AREA_VVAR))
-		return false;
-
-	/*
-	 * Optimisation for private mapping pages, that haven't
-	 * yet being COW-ed
-	 */
-	if (vma_entry_is(vmae, VMA_FILE_PRIVATE) && (pme & PME_FILE))
-		return false;
 	if (vma_entry_is(vmae, VMA_AREA_AIORING))
 		return true;
-	if ((pme & (PME_PRESENT | PME_SWAP)) && !__page_is_zero(pme))
-		return true;
 
 	return false;
 }
 
+/*
+ * should_dump_page writes vaddr in page_info->next if an addressed page has to be dumped.
+ * Otherwise, it writes an address that has to be inspected next.
+ */
+int should_dump_page(pmc_t *pmc, VmaEntry *vmae, u64 vaddr, struct page_info *page_info)
+{
+	if (!page_info)
+		goto err;
+
+	if (vaddr >= pmc->end && pmc_fill(pmc, vaddr, vmae->end))
+		goto err;
+
+	if (pmc->regs) {
+		while (1) {
+			if (pmc->regs_idx == pmc->regs_len) {
+				page_info->next = pmc->end;
+				return 0;
+			}
+
+			if (vaddr < pmc->regs[pmc->regs_idx].end)
+				break;
+			pmc->regs_idx++;
+		}
+
+		if (vaddr < pmc->regs[pmc->regs_idx].start) {
+			page_info->next = pmc->regs[pmc->regs_idx].start;
+			return 0;
+		}
+
+		if (pmc->regs[pmc->regs_idx].categories & PAGE_IS_GUARD)
+			goto skip_guard_page;
+
+		page_info->softdirty = pmc->regs[pmc->regs_idx].categories & PAGE_IS_SOFT_DIRTY;
+		page_info->next = vaddr;
+		return 0;
+	} else {
+		u64 pme = pmc->map[PAGE_PFN(vaddr - pmc->start)];
+
+		if (pme & PME_GUARD_REGION)
+			goto skip_guard_page;
+
+		/*
+		 * Optimisation for private mapping pages, that haven't
+		 * yet being COW-ed
+		 */
+		if (vma_entry_is(vmae, VMA_FILE_PRIVATE) && (pme & PME_FILE)) {
+			page_info->next = vaddr + PAGE_SIZE;
+			return 0;
+		}
+
+		if ((pme & (PME_PRESENT | PME_SWAP)) && !__page_is_zero(pme)) {
+			page_info->softdirty = pme & PME_SOFT_DIRTY;
+			page_info->next = vaddr;
+			return 0;
+		}
+
+		page_info->next = vaddr + PAGE_SIZE;
+		return 0;
+	}
+
+err:
+	pr_err("should_dump_page failed on vma "
+	       "%#016" PRIx64 "-%#016" PRIx64 " vaddr=%#016" PRIx64 "\n",
+	       vmae->start, vmae->end, vaddr);
+	return -1;
+
+skip_guard_page:
+	page_info->next = vaddr + PAGE_SIZE;
+	return 0;
+}
+
 bool page_is_zero(u64 pme)
 {
 	return __page_is_zero(pme);
@@ -161,28 +215,34 @@ static bool is_stack(struct pstree_item *item, unsigned long vaddr)
  * put the memory into the page-pipe's pipe.
  *
  * "Holes" in page-pipe are regions, that should be dumped, but
- * the memory contents is present in the pagent image set.
+ * the memory contents is present in the parent image set.
  */
 
-static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct page_pipe *pp, u64 *map, u64 *off,
+static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct page_pipe *pp, pmc_t *pmc, u64 *pvaddr,
 			 bool has_parent)
 {
-	u64 *at = &map[PAGE_PFN(*off)];
-	unsigned long pfn, nr_to_scan;
+	unsigned long nr_scanned;
 	unsigned long pages[3] = {};
+	unsigned long vaddr;
+	bool dump_all_pages;
 	int ret = 0;
 
-	nr_to_scan = (vma_area_len(vma) - *off) / PAGE_SIZE;
+	dump_all_pages = should_dump_entire_vma(vma->e);
 
-	for (pfn = 0; pfn < nr_to_scan; pfn++) {
-		unsigned long vaddr;
+	nr_scanned = 0;
+	for (vaddr = *pvaddr; vaddr < vma->e->end; vaddr += PAGE_SIZE, nr_scanned++) {
 		unsigned int ppb_flags = 0;
+		struct page_info page_info = {};
 		int st;
 
-		if (!should_dump_page(vma->e, at[pfn]))
-			continue;
+		/* If dump_all_pages is true, should_dump_page is called to get pme. */
+		if (should_dump_page(pmc, vma->e, vaddr, &page_info))
+			return -1;
 
-		vaddr = vma->e->start + *off + pfn * PAGE_SIZE;
+		if (!dump_all_pages && page_info.next != vaddr) {
+			vaddr = page_info.next - PAGE_SIZE;
+			continue;
+		}
 
 		if (vma_entry_can_be_lazy(vma->e) && !is_stack(item, vaddr))
 			ppb_flags |= PPB_LAZY;
@@ -194,7 +254,7 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct
 		 * page. The latter would be checked in page-xfer.
 		 */
 
-		if (has_parent && page_in_parent(at[pfn] & PME_SOFT_DIRTY)) {
+		if (has_parent && page_in_parent(page_info.softdirty)) {
 			ret = page_pipe_add_hole(pp, vaddr, PP_HOLE_PARENT);
 			st = 0;
 		} else {
@@ -214,9 +274,8 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct
 		pages[st]++;
 	}
 
-	*off += pfn * PAGE_SIZE;
-
-	cnt_add(CNT_PAGES_SCANNED, nr_to_scan);
+	*pvaddr = vaddr;
+	cnt_add(CNT_PAGES_SCANNED, nr_scanned);
 	cnt_add(CNT_PAGES_SKIPPED_PARENT, pages[0]);
 	cnt_add(CNT_PAGES_LAZY, pages[1]);
 	cnt_add(CNT_PAGES_WRITTEN, pages[2]);
@@ -246,6 +305,12 @@ prep_dump_pages_args(struct parasite_ctl *ctl, struct vm_area_list *vma_area_lis
 		 */
 		if (vma_entry_is(vma->e, VMA_AREA_AIORING) && skip_non_trackable)
 			continue;
+		/*
+		 * We totally ignore MAP_HUGETLB on pre-dump.
+		 * See also generate_vma_iovs() comment.
+		 */
+		if ((vma->e->flags & MAP_HUGETLB) && skip_non_trackable)
+			continue;
 		if (vma->e->prot & PROT_READ)
 			continue;
 
@@ -271,7 +336,7 @@ static int drain_pages(struct page_pipe *pp, struct parasite_ctl *ctl, struct pa
 	list_for_each_entry(ppb, &pp->bufs, l) {
 		args->nr_segs = ppb->nr_segs;
 		args->nr_pages = ppb->pages_in;
-		pr_debug("PPB: %d pages %d segs %u pipe %d off\n", args->nr_pages, args->nr_segs, ppb->pipe_size,
+		pr_debug("PPB: %ld pages %d segs %u pipe %d off\n", args->nr_pages, args->nr_segs, ppb->pipe_size,
 			 args->off);
 
 		ret = compel_rpc_call(PARASITE_CMD_DUMPPAGES, ctl);
@@ -350,12 +415,31 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma, str
 			     struct page_xfer *xfer, struct parasite_dump_pages_args *args, struct parasite_ctl *ctl,
 			     pmc_t *pmc, bool has_parent, bool pre_dump, int parent_predump_mode)
 {
-	u64 off = 0;
-	u64 *map;
+	u64 vaddr;
 	int ret;
 
 	if (!vma_area_is_private(vma, kdat.task_size) && !vma_area_is(vma, VMA_ANON_SHARED))
 		return 0;
+	/*
+	 * In turn VVAR area is special and referenced from
+	 * vDSO area by IP addressing (at least on x86) thus
+	 * never ever dump its content but always use one provided
+	 * by the kernel on restore, ie runtime VVAR area must
+	 * be remapped into proper place..
+	 */
+	if (vma_entry_is(vma->e, VMA_AREA_VVAR))
+		return 0;
+
+	/*
+	 * 9651fcedf7b9 ("mm: add MAP_DROPPABLE for designating always lazily freeable mappings")
+	 * tells us that:
+	 * Under memory pressure, mm can just drop the pages (so that they're
+	 * zero when read back again).
+	 *
+	 * Let's just skip MAP_DROPPABLE mappings pages dump logic.
+	 */
+	if (vma->e->flags & MAP_DROPPABLE)
+		return 0;
 
 	/*
 	 * To facilitate any combination of pre-dump modes to run after
@@ -402,21 +486,27 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma, str
 			has_parent = false;
 	}
 
-	if (vma_entry_is(vma->e, VMA_AREA_AIORING)) {
+	/*
+	 * We want to completely ignore these VMA types on the pre-dump:
+	 * 1. VMA_AREA_AIORING because it is not soft-dirty trackable (kernel writes)
+	 * 2. MAP_HUGETLB mappings because they are not premapped and we can't use
+	 * parent images from pre-dump stages. Instead, the content is restored from
+	 * the parasite context using full memory image.
+	 */
+	if (vma_entry_is(vma->e, VMA_AREA_AIORING) || vma->e->flags & MAP_HUGETLB) {
 		if (pre_dump)
 			return 0;
 		has_parent = false;
 	}
 
-	map = pmc_get_map(pmc, vma);
-	if (!map)
+	if (pmc_get_map(pmc, vma))
 		return -1;
 
 	if (vma_area_is(vma, VMA_ANON_SHARED))
-		return add_shmem_area(item->pid->real, vma->e, map);
-
+		return add_shmem_area(item->pid->real, vma->e, pmc);
+	vaddr = vma->e->start;
 again:
-	ret = generate_iovs(item, vma, pp, map, &off, has_parent);
+	ret = generate_iovs(item, vma, pp, pmc, &vaddr, has_parent);
 	if (ret == -EAGAIN) {
 		BUG_ON(!(pp->flags & PP_CHUNK_MODE));
 
@@ -509,6 +599,9 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit
 		parent_predump_mode = mdc->parent_ie->pre_dump_mode;
 
 	list_for_each_entry(vma_area, &vma_area_list->h, list) {
+		if (vma_area_is(vma_area, VMA_AREA_GUARD))
+			continue;
+
 		ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, &pmc, has_parent, mdc->pre_dump,
 					parent_predump_mode);
 		if (ret < 0)
@@ -769,14 +862,14 @@ static void prepare_cow_vmas_for(struct vm_area_list *vmas, struct vm_area_list
 		/* <= here to shift from matching VMAs and ... */
 		while (vma->e->start <= pvma->e->start) {
 			vma = vma_next(vma);
-			if (&vma->list == &vmas->h)
+			if ((&vma->list == &vmas->h) || vma_area_is(vma, VMA_AREA_GUARD))
 				return;
 		}
 
 		/* ... no == here since we must stop on matching pair */
 		while (pvma->e->start < vma->e->start) {
 			pvma = vma_next(pvma);
-			if (&pvma->list == &pvmas->h)
+			if ((&pvma->list == &pvmas->h) || vma_area_is(pvma, VMA_AREA_GUARD))
 				return;
 		}
 	}
@@ -835,6 +928,7 @@ static int premap_private_vma(struct pstree_item *t, struct vma_area *vma, void
 		vma->e->start -= PAGE_SIZE;
 
 	size = vma_entry_len(vma->e);
+
 	if (!vma_inherited(vma)) {
 		int flag = 0;
 		/*
@@ -910,6 +1004,15 @@ static int premap_private_vma(struct pstree_item *t, struct vma_area *vma, void
 
 static inline bool vma_force_premap(struct vma_area *vma, struct list_head *head)
 {
+	/*
+	 * Shadow stack VMAs cannot be mmap()ed, they must be created using
+	 * map_shadow_stack() system call.
+	 * Premap them to reserve virtual address space and populate them
+	 * to have there contents available for later copying.
+	 */
+	if (vma_area_is(vma, VMA_AREA_SHSTK))
+		return true;
+
 	/*
 	 * On kernels with 4K guard pages, growsdown VMAs
 	 * always have one guard page at the
@@ -960,6 +1063,9 @@ static int premap_priv_vmas(struct pstree_item *t, struct vm_area_list *vmas, vo
 	filemap_ctx_init(true);
 
 	list_for_each_entry(vma, &vmas->h, list) {
+		if (vma_area_is(vma, VMA_AREA_GUARD))
+			continue;
+
 		if (task_size_check(vpid(t), vma->e)) {
 			ret = -1;
 			break;
@@ -991,7 +1097,7 @@ static int premap_priv_vmas(struct pstree_item *t, struct vm_area_list *vmas, vo
 			do {
 				if (pr->pe->vaddr + pr->pe->nr_pages * PAGE_SIZE <= vma->e->start)
 					continue;
-				if (pr->pe->vaddr > vma->e->end)
+				if (pr->pe->vaddr >= vma->e->end)
 					vma->e->status |= VMA_NO_PROT_WRITE;
 				break;
 			} while (pr->advance(pr));
@@ -1021,6 +1127,7 @@ static int restore_priv_vma_content(struct pstree_item *t, struct page_read *pr)
 	unsigned int nr_shared = 0;
 	unsigned int nr_dropped = 0;
 	unsigned int nr_compared = 0;
+	unsigned int nr_enqueued = 0;
 	unsigned int nr_lazy = 0;
 	unsigned long va;
 
@@ -1096,7 +1203,8 @@ static int restore_priv_vma_content(struct pstree_item *t, struct page_read *pr)
 				len >>= PAGE_SHIFT;
 				nr_restored += len;
 				i += len - 1;
-				pr_debug("Enqueue page-read\n");
+
+				nr_enqueued++;
 				continue;
 			}
 
@@ -1165,6 +1273,9 @@ err_read:
 		unsigned long size, i = 0;
 		void *addr = decode_pointer(vma->premmaped_addr);
 
+		if (vma_area_is(vma, VMA_AREA_GUARD))
+			continue;
+
 		if (!vma_inherited(vma))
 			continue;
 
@@ -1192,7 +1303,8 @@ err_read:
 
 	pr_info("nr_restored_pages: %d\n", nr_restored);
 	pr_info("nr_shared_pages:   %d\n", nr_shared);
-	pr_info("nr_dropped_pages:   %d\n", nr_dropped);
+	pr_info("nr_dropped_pages:  %d\n", nr_dropped);
+	pr_info("nr_enqueued:       %d\n", nr_enqueued);
 	pr_info("nr_lazy:           %d\n", nr_lazy);
 
 	return 0;
@@ -1204,8 +1316,6 @@ err_addr:
 
 static int maybe_disable_thp(struct pstree_item *t, struct page_read *pr)
 {
-	MmEntry *mm = rsti(t)->mm;
-
 	/*
 	 * There is no need to disable it if the page read doesn't
 	 * have parent. In this case VMA will be empty until
@@ -1228,8 +1338,6 @@ static int maybe_disable_thp(struct pstree_item *t, struct page_read *pr)
 		pr_perror("Cannot disable THP");
 		return -1;
 	}
-	if (!(mm->has_thp_disabled && mm->thp_disabled))
-		rsti(t)->has_thp_enabled = true;
 
 	return 0;
 }
@@ -1431,3 +1539,72 @@ int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta)
 
 	return prepare_vma_ios(t, ta);
 }
+
+int collect_madv_guards(pid_t pid, struct vm_area_list *vma_area_list)
+{
+	int pagemap_fd = -1;
+	struct page_region *regs = NULL;
+	long regs_len = 0;
+	int i, ret = -1;
+
+	struct pm_scan_arg args = {
+		.size = sizeof(struct pm_scan_arg),
+		.flags = 0,
+		.start = 0,
+		.end = kdat.task_size,
+		.walk_end = 0,
+		.vec_len = 1000, /* this should be enough for most cases */
+		.max_pages = 0,
+		.category_mask = PAGE_IS_GUARD,
+		.return_mask = PAGE_IS_GUARD,
+	};
+
+	if (!kdat.has_pagemap_scan_guard_pages) {
+		ret = 0;
+		goto out;
+	}
+
+	pagemap_fd = open_proc(pid, "pagemap");
+	if (pagemap_fd < 0)
+		goto out;
+
+	regs = xmalloc(args.vec_len * sizeof(struct page_region));
+	if (!regs)
+		goto out;
+	args.vec = (long)regs;
+
+	do {
+		/* start from where we finished the last time */
+		args.start = args.walk_end;
+		regs_len = ioctl(pagemap_fd, PAGEMAP_SCAN, &args);
+		if (regs_len == -1) {
+			pr_perror("PAGEMAP_SCAN");
+			goto out;
+		}
+
+		for (i = 0; i < regs_len; i++) {
+			struct vma_area *vma;
+
+			BUG_ON(!(regs[i].categories & PAGE_IS_GUARD));
+
+			vma = alloc_vma_area();
+			if (!vma)
+				goto out;
+
+			vma->e->start = regs[i].start;
+			vma->e->end = regs[i].end;
+			vma->e->status = VMA_AREA_GUARD;
+
+			list_add_tail(&vma->list, &vma_area_list->h);
+			vma_area_list->nr++;
+		}
+	} while (args.walk_end != kdat.task_size);
+
+	ret = 0;
+
+out:
+	xfree(regs);
+	if (pagemap_fd >= 0)
+		close(pagemap_fd);
+	return ret;
+}
diff --git a/criu/memfd.c b/criu/memfd.c
index da2937703..9d9f0621f 100644
--- a/criu/memfd.c
+++ b/criu/memfd.c
@@ -46,6 +46,7 @@ struct memfd_restore_inode {
 	int fdstore_id;
 	unsigned int pending_seals;
 	MemfdInodeEntry *mie;
+	bool was_opened_rw;
 };
 
 static LIST_HEAD(memfd_inodes);
@@ -91,10 +92,21 @@ static int dump_memfd_inode(int fd, struct memfd_dump_inode *inode, const char *
 		mie.has_hugetlb_flag = true;
 		mie.hugetlb_flag = flag | MFD_HUGETLB;
 	}
+	mie.mode = st->st_mode;
+	mie.has_mode = true;
 
 	mie.seals = fcntl(fd, F_GET_SEALS);
-	if (mie.seals == -1)
-		goto out;
+	if (mie.seals == -1) {
+		if (errno != EINVAL || ~mie.hugetlb_flag & MFD_HUGETLB) {
+			pr_perror("fcntl(F_GET_SEALS)");
+			goto out;
+		}
+		/* Kernels before 4.16 don't allow MFD_HUGETLB |
+		 * MFD_ALLOW_SEALING and return EINVAL for
+		 * fcntl(MFD_HUGETLB-enabled fd).
+		 */
+		mie.seals = F_SEAL_SEAL;
+	}
 
 	if (pb_write_one(img_from_set(glob_imgset, CR_FD_MEMFD_INODE), &mie, PB_MEMFD_INODE))
 		goto out;
@@ -222,6 +234,7 @@ static int collect_one_memfd_inode(void *o, ProtobufCMessage *base, struct cr_im
 	mutex_init(&inode->lock);
 	inode->fdstore_id = -1;
 	inode->pending_seals = 0;
+	inode->was_opened_rw = false;
 
 	list_add_tail(&inode->list, &memfd_inodes);
 
@@ -270,8 +283,13 @@ static int memfd_open_inode_nocache(struct memfd_restore_inode *inode)
 	if (restore_memfd_shmem_content(fd, mie->shmid, mie->size))
 		goto out;
 
-	if (fchown(fd, mie->uid, mie->gid)) {
-		pr_perror("Can't change uid %d gid %d of memfd:%s", (int)mie->uid, (int)mie->gid, mie->name);
+	if (mie->has_mode)
+		ret = cr_fchperm(fd, mie->uid, mie->gid, mie->mode);
+	else
+		ret = cr_fchown(fd, mie->uid, mie->gid);
+	if (ret) {
+		pr_perror("Can't set permissions { uid %d gid %d mode %#o } of memfd:%s", (int)mie->uid,
+			  (int)mie->gid, mie->has_mode ? (int)mie->mode : -1, mie->name);
 		goto out;
 	}
 
@@ -305,7 +323,7 @@ static int memfd_open_inode(struct memfd_restore_inode *inode)
 	return fd;
 }
 
-int memfd_open(struct file_desc *d, u32 *fdflags)
+int memfd_open(struct file_desc *d, u32 *fdflags, bool filemap)
 {
 	struct memfd_info *mfi;
 	MemfdFileEntry *mfe;
@@ -315,57 +333,80 @@ int memfd_open(struct file_desc *d, u32 *fdflags)
 	mfi = container_of(d, struct memfd_info, d);
 	mfe = mfi->mfe;
 
-	if (inherited_fd(d, &fd))
-		return fd;
-
 	pr_info("Restoring memfd id=%d\n", mfe->id);
 
 	fd = memfd_open_inode(mfi->inode);
 	if (fd < 0)
-		goto err;
+		return -1;
 
 	/* Reopen the fd with original permissions */
 	flags = fdflags ? *fdflags : mfe->flags;
+
+	if (filemap && (flags & O_ACCMODE) == O_RDWR)
+		return fd;
+
+	if (!mfi->inode->was_opened_rw && (flags & O_ACCMODE) == O_RDWR) {
+		/*
+		 * If there is only a single RW-opened fd for a memfd, it can
+		 * be used to pass it to execveat() with AT_EMPTY_PATH to have
+		 * its contents executed.  This currently works only for the
+		 * original fd from memfd_create() so return the original fd
+		 * once -- in case the caller expects to be the sole opener
+		 * and does execveat() from this memfd.
+		 */
+		if (!fcntl(fd, F_SETFL, flags)) {
+			mfi->inode->was_opened_rw = true;
+			return fd;
+		}
+
+		pr_pwarn("Can't change fd flags to %#o for memfd id=%d", flags, mfe->id);
+	}
+
 	/*
 	 * Ideally we should call compat version open() to not force the
 	 * O_LARGEFILE file flag with regular open(). It doesn't seem that
 	 * important though.
 	 */
 	_fd = __open_proc(PROC_SELF, 0, flags, "fd/%d", fd);
-	if (_fd < 0) {
+	if (_fd < 0)
 		pr_perror("Can't reopen memfd id=%d", mfe->id);
-		goto err;
-	}
+	else if (!filemap && (flags & O_ACCMODE) == O_RDWR)
+		pr_warn("execveat(fd=%d, ..., AT_EMPTY_PATH) might fail after restore; memfd id=%d\n", _fd, mfe->id);
+
 	close(fd);
-	fd = _fd;
+	return _fd;
+}
+
+static int memfd_open_fe_fd(struct file_desc *d, int *new_fd)
+{
+	MemfdFileEntry *mfe;
+	int fd;
+
+	if (inherited_fd(d, new_fd))
+		return 0;
+
+	fd = memfd_open(d, NULL, false);
+	if (fd < 0)
+		return -1;
+
+	mfe = container_of(d, struct memfd_info, d)->mfe;
 
 	if (restore_fown(fd, mfe->fown) < 0)
 		goto err;
 
 	if (lseek(fd, mfe->pos, SEEK_SET) < 0) {
-		pr_perror("Can't restore file position of memfd id=%d", mfe->id);
+		pr_perror("Can't restore file position of %d for memfd id=%d", fd, mfe->id);
 		goto err;
 	}
 
-	return fd;
+	*new_fd = fd;
+	return 0;
 
 err:
-	if (fd >= 0)
-		close(fd);
+	close(fd);
 	return -1;
 }
 
-static int memfd_open_fe_fd(struct file_desc *fd, int *new_fd)
-{
-	int tmp;
-
-	tmp = memfd_open(fd, NULL);
-	if (tmp < 0)
-		return -1;
-	*new_fd = tmp;
-	return 0;
-}
-
 static char *memfd_d_name(struct file_desc *d, char *buf, size_t s)
 {
 	MemfdInodeEntry *mie = NULL;
diff --git a/criu/mount-v2.c b/criu/mount-v2.c
index 5d53e9a22..1e33ac12a 100644
--- a/criu/mount-v2.c
+++ b/criu/mount-v2.c
@@ -443,6 +443,7 @@ err:
 /* Mounts root container mount. */
 static int do_mount_root_v2(struct mount_info *mi)
 {
+	unsigned long mflags = mi->flags & (~MS_PROPAGATE);
 	unsigned long flags = MS_BIND;
 	int fd;
 
@@ -477,6 +478,11 @@ static int do_mount_root_v2(struct mount_info *mi)
 		return -1;
 	}
 
+	if (mflags && mount(NULL, mi->plain_mountpoint, NULL, MS_REMOUNT | MS_BIND | mflags, NULL)) {
+		pr_perror("Unable to apply root mount options");
+		return -1;
+	}
+
 	mi->mounted = true;
 
 	return 0;
@@ -927,8 +933,12 @@ static int move_mount_set_group(int src_id, char *source, int dst_id)
 
 static int restore_one_sharing(struct sharing_group *sg, struct mount_info *target)
 {
+	int nsfd = -1, orig_nsfd = -1, exit_code = -1;
 	char target_path[PATH_MAX];
-	int target_fd;
+	int target_fd = -1;
+
+	if (!sg->master_id && !sg->shared_id)
+		return 0;
 
 	target_fd = fdstore_get(target->mnt_fd_id);
 	BUG_ON(target_fd < 0);
@@ -943,8 +953,7 @@ static int restore_one_sharing(struct sharing_group *sg, struct mount_info *targ
 			first = get_first_mount(sg->parent);
 			if (move_mount_set_group(first->mnt_fd_id, NULL, target->mnt_fd_id)) {
 				pr_err("Failed to copy sharing from %d to %d\n", first->mnt_id, target->mnt_id);
-				close(target_fd);
-				return -1;
+				goto err;
 			}
 		} else {
 			/*
@@ -956,16 +965,23 @@ static int restore_one_sharing(struct sharing_group *sg, struct mount_info *targ
 			 */
 			if (move_mount_set_group(-1, sg->source, target->mnt_fd_id)) {
 				pr_err("Failed to copy sharing from source %s to %d\n", sg->source, target->mnt_id);
-				close(target_fd);
-				return -1;
+				goto err;
 			}
 		}
+	}
 
+	nsfd = fdstore_get(target->nsid->mnt.nsfd_id);
+	if (nsfd < 0)
+		goto err;
+
+	if (switch_ns_by_fd(nsfd, &mnt_ns_desc, &orig_nsfd))
+		goto err;
+
+	if (sg->master_id) {
 		/* Convert shared_id to master_id */
 		if (mount(NULL, target_path, NULL, MS_SLAVE, NULL)) {
 			pr_perror("Failed to make mount %d slave", target->mnt_id);
-			close(target_fd);
-			return -1;
+			goto err;
 		}
 	}
 
@@ -973,13 +989,16 @@ static int restore_one_sharing(struct sharing_group *sg, struct mount_info *targ
 	if (sg->shared_id) {
 		if (mount(NULL, target_path, NULL, MS_SHARED, NULL)) {
 			pr_perror("Failed to make mount %d shared", target->mnt_id);
-			close(target_fd);
-			return -1;
+			goto err;
 		}
 	}
-	close(target_fd);
-
-	return 0;
+	exit_code = 0;
+err:
+	close_safe(&target_fd);
+	close_safe(&nsfd);
+	if (orig_nsfd >= 0 && restore_ns(orig_nsfd, &mnt_ns_desc))
+		exit_code = -1;
+	return exit_code;
 }
 
 static int restore_one_sharing_group(struct sharing_group *sg)
diff --git a/criu/mount.c b/criu/mount.c
index 115e3d067..b643a7f26 100644
--- a/criu/mount.c
+++ b/criu/mount.c
@@ -98,7 +98,7 @@ static char *ext_mount_lookup(char *key)
 	int len = strlen(key);
 	char mkey[len + 6];
 
-	sprintf(mkey, "mnt[%s]", key);
+	snprintf(mkey, sizeof(mkey), "mnt[%s]", key);
 	v = external_lookup_by_key(mkey);
 	if (IS_ERR(v))
 		v = NULL;
@@ -826,7 +826,7 @@ static struct ns_id *find_ext_ns_id(void)
 
 	for (ns = ns_ids; ns->next; ns = ns->next)
 		if (ns->type == NS_CRIU && ns->nd == &mnt_ns_desc) {
-			if (!ns->mnt.mntinfo_list && !collect_mntinfo(ns, true))
+			if (!ns->mnt.mntinfo_list && !collect_mntinfo(ns, false))
 				break;
 			return ns;
 		}
@@ -888,7 +888,11 @@ static int resolve_external_mounts(struct mount_info *info)
 
 		cut_root = cut_root_for_bind(m->root, match->root);
 
-		p = xsprintf("%s/%s", match->ns_mountpoint + 1, cut_root);
+		if (cut_root[0] == '\0') {
+			p = xstrdup(match->ns_mountpoint + 1);
+		} else {
+			p = xsprintf("%s/%s", match->ns_mountpoint + 1, cut_root);
+		}
 		if (!p)
 			return -1;
 
@@ -1197,8 +1201,8 @@ int __check_mountpoint_fd(struct mount_info *pm, int mnt_fd, bool parse_mountinf
 		    dev == pm->s_dev_rt)
 			return 0;
 
-		pr_err("The file system %#x %#x (%#x) %s %s is inaccessible\n", pm->s_dev, pm->s_dev_rt, dev,
-		       pm->fstype->name, pm->ns_mountpoint);
+		pr_warn("The file system %#x %#x (%#x) %s %s is inaccessible\n", pm->s_dev, pm->s_dev_rt, dev,
+		        pm->fstype->name, pm->ns_mountpoint);
 		return -1;
 	}
 
@@ -1239,12 +1243,16 @@ int __open_mountpoint(struct mount_info *pm)
 int open_mount(unsigned int s_dev)
 {
 	struct mount_info *m;
+	int mnt_fd;
 
 	m = lookup_mnt_sdev(s_dev);
 	if (!m)
 		return -ENOENT;
 
-	return __open_mountpoint(m);
+	mnt_fd = __open_mountpoint(m);
+	if (mnt_fd < 0)
+		pr_err("Can't open mount %#x\n", s_dev);
+	return mnt_fd;
 }
 
 /* Bind-mount a mount point in a temporary place without children */
@@ -1723,38 +1731,49 @@ err:
 	return NULL;
 }
 
-/* Returns 1 in case of success, -errno in case of mount fail, and 0 on other errors */
+/*
+ * Returns:
+ *  0 - success
+ * -1 - error
+ *  1 - skip
+ */
 static __maybe_unused int mount_cr_time_mount(struct ns_id *ns, unsigned int *s_dev, const char *source,
 					      const char *target, const char *type)
 {
-	int mnt_fd, cwd_fd, ret, exit_code = 0;
+	int mnt_fd, cwd_fd, exit_code = -1;
 	struct stat st;
 
-	ret = switch_mnt_ns(ns->ns_pid, &mnt_fd, &cwd_fd);
-	if (ret < 0) {
+	if (switch_mnt_ns(ns->ns_pid, &mnt_fd, &cwd_fd)) {
 		pr_err("Can't switch mnt_ns\n");
-		goto out;
+		return -1;
 	}
 
-	ret = mount(source, target, type, 0, NULL);
-	if (ret < 0) {
-		pr_perror("Unable to mount %s %s", source, target);
-		exit_code = -errno;
-		goto restore_ns;
-	} else {
-		if (stat(target, &st) < 0) {
-			pr_perror("Can't stat %s", target);
-			exit_code = 0;
-		} else {
-			*s_dev = MKKDEV(major(st.st_dev), minor(st.st_dev));
+	if (mount(source, target, type, 0, NULL)) {
+		switch (errno) {
+		case EPERM:
+		case EBUSY:
+		case ENODEV:
+		case ENOENT:
+			pr_debug("Skipping %s as was unable to mount it: %s\n", type, strerror(errno));
 			exit_code = 1;
+			break;
+		default:
+			pr_perror("Unable to mount %s %s %s", type, source, target);
 		}
+		goto restore_ns;
 	}
 
+	if (stat(target, &st)) {
+		pr_perror("Can't stat %s", target);
+		goto restore_ns;
+	}
+
+	*s_dev = MKKDEV(major(st.st_dev), minor(st.st_dev));
+	exit_code = 0;
 restore_ns:
-	ret = restore_mnt_ns(mnt_fd, &cwd_fd);
-out:
-	return ret < 0 ? 0 : exit_code;
+	if (restore_mnt_ns(mnt_fd, &cwd_fd))
+		exit_code = -1;
+	return exit_code;
 }
 
 static int dump_one_fs(struct mount_info *mi)
@@ -2675,9 +2694,16 @@ shared:
 
 static int do_mount_root(struct mount_info *mi)
 {
+	unsigned long mflags = mi->flags & (~MS_PROPAGATE);
+
 	if (restore_shared_options(mi, !mi->shared_id && !mi->master_id, mi->shared_id, mi->master_id))
 		return -1;
 
+	if (mflags && mount(NULL, service_mountpoint(mi), NULL, MS_REMOUNT | MS_BIND | mflags, NULL)) {
+		pr_perror("Unable to apply root mount options");
+		return -1;
+	}
+
 	return fetch_rt_stat(mi, service_mountpoint(mi));
 }
 
@@ -2812,7 +2838,7 @@ static LIST_HEAD(mnt_remap_list);
 static int remap_id;
 
 struct mnt_remap_entry {
-	struct mount_info *mi;	   /* child is remaped into the root yards */
+	struct mount_info *mi;	   /* child is remapped into the root yards */
 	struct mount_info *parent; /* the origin parent for the child*/
 	struct list_head node;
 };
@@ -3978,16 +4004,10 @@ int collect_mnt_namespaces(bool for_dump)
 
 		if (ns) {
 			ret = mount_cr_time_mount(ns, &s_dev, "binfmt_misc", "/" BINFMT_MISC_HOME, "binfmt_misc");
-			if (ret == -EPERM)
-				pr_info("Can't mount binfmt_misc: EPERM. Running in user_ns?\n");
-			else if (ret < 0 && ret != -EBUSY && ret != -ENODEV && ret != -ENOENT) {
-				pr_err("Can't mount binfmt_misc: %d %s\n", ret, strerror(-ret));
+			if (ret == -1) {
 				goto err;
-			} else if (ret == 0) {
-				ret = -1;
-				goto err;
-			} else if (ret > 0 && !add_cr_time_mount(ns->mnt.mntinfo_tree, "binfmt_misc", BINFMT_MISC_HOME,
-								 s_dev, false)) {
+			} else if (ret == 0 && !add_cr_time_mount(ns->mnt.mntinfo_tree, "binfmt_misc", BINFMT_MISC_HOME,
+								  s_dev, false)) {
 				ret = -1;
 				goto err;
 			}
diff --git a/criu/namespaces.c b/criu/namespaces.c
index 7356fe8c2..0c9b16a87 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -4,7 +4,6 @@
 #include <stdlib.h>
 #include <sys/prctl.h>
 #include <grp.h>
-#include <sys/socket.h>
 #include <sys/un.h>
 #include <stdarg.h>
 #include <signal.h>
@@ -28,6 +27,7 @@
 #include "cgroup.h"
 #include "fdstore.h"
 #include "kerndat.h"
+#include "util-caps.h"
 
 #include "protobuf.h"
 #include "util.h"
@@ -284,7 +284,6 @@ int restore_ns(int rst, struct ns_desc *nd)
 
 int switch_mnt_ns(int pid, int *rst, int *cwd_fd)
 {
-	int ret;
 	int fd;
 
 	if (!cwd_fd)
@@ -293,13 +292,12 @@ int switch_mnt_ns(int pid, int *rst, int *cwd_fd)
 	fd = open(".", O_PATH);
 	if (fd < 0) {
 		pr_perror("unable to open current directory");
-		return fd;
+		return -1;
 	}
 
-	ret = switch_ns(pid, &mnt_ns_desc, rst);
-	if (ret < 0) {
+	if (switch_ns(pid, &mnt_ns_desc, rst)) {
 		close(fd);
-		return ret;
+		return -1;
 	}
 
 	*cwd_fd = fd;
@@ -308,23 +306,22 @@ int switch_mnt_ns(int pid, int *rst, int *cwd_fd)
 
 int restore_mnt_ns(int rst, int *cwd_fd)
 {
-	int ret = -1;
+	int exit_code = -1;
 
-	ret = restore_ns(rst, &mnt_ns_desc);
-	if (ret < 0)
+	if (restore_ns(rst, &mnt_ns_desc))
 		goto err_restore;
 
-	if (cwd_fd) {
-		ret = fchdir(*cwd_fd);
-		if (ret)
-			pr_perror("unable to restore current directory");
+	if (cwd_fd && fchdir(*cwd_fd)) {
+		pr_perror("Unable to restore current directory");
+		goto err_restore;
 	}
 
+	exit_code = 0;
 err_restore:
 	if (cwd_fd)
 		close_safe(cwd_fd);
 
-	return ret;
+	return exit_code;
 }
 
 struct ns_id *ns_ids = NULL;
@@ -1012,36 +1009,31 @@ int dump_user_ns(pid_t pid, int ns_id)
 
 	ret = parse_id_map(pid, "uid_map", &e->uid_map);
 	if (ret < 0)
-		goto err;
+		/*
+		 * The uid_map and gid_map is clean up in free_userns_maps
+		 * later, so we don't need to clean these up in error cases.
+		 */
+		return -1;
+
 	e->n_uid_map = ret;
 
 	ret = parse_id_map(pid, "gid_map", &e->gid_map);
 	if (ret < 0)
-		goto err;
+		return -1;
 	e->n_gid_map = ret;
 
 	if (check_user_ns(pid))
-		goto err;
+		return -1;
 
 	img = open_image(CR_FD_USERNS, O_DUMP, ns_id);
 	if (!img)
-		goto err;
+		return -1;
 	ret = pb_write_one(img, e, PB_USERNS);
 	close_image(img);
 	if (ret < 0)
-		goto err;
+		return -1;
 
 	return 0;
-err:
-	if (e->uid_map) {
-		xfree(e->uid_map[0]);
-		xfree(e->uid_map);
-	}
-	if (e->gid_map) {
-		xfree(e->gid_map[0]);
-		xfree(e->gid_map);
-	}
-	return -1;
 }
 
 void free_userns_maps(void)
@@ -1217,20 +1209,9 @@ static int write_id_map(pid_t pid, UidGidExtent **extents, int n, char *id_map)
 	return 0;
 }
 
-struct unsc_msg {
-	struct msghdr h;
-	/*
-	 * 0th is the call address
-	 * 1st is the flags
-	 * 2nd is the optional (NULL in response) arguments
-	 */
-	struct iovec iov[3];
-	char c[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))];
-};
-
 static int usernsd_pid;
 
-static inline void unsc_msg_init(struct unsc_msg *m, uns_call_t *c, int *x, void *arg, size_t asize, int fd)
+inline void unsc_msg_init(struct unsc_msg *m, uns_call_t *c, int *x, void *arg, size_t asize, int fd, pid_t *pid)
 {
 	struct cmsghdr *ch;
 	struct ucred *ucred;
@@ -1268,7 +1249,10 @@ static inline void unsc_msg_init(struct unsc_msg *m, uns_call_t *c, int *x, void
 	ch->cmsg_type = SCM_CREDENTIALS;
 
 	ucred = (struct ucred *)CMSG_DATA(ch);
-	ucred->pid = getpid();
+	if (pid)
+		ucred->pid = *pid;
+	else
+		ucred->pid = getpid();
 	ucred->uid = getuid();
 	ucred->gid = getgid();
 
@@ -1283,7 +1267,7 @@ static inline void unsc_msg_init(struct unsc_msg *m, uns_call_t *c, int *x, void
 	}
 }
 
-static void unsc_msg_pid_fd(struct unsc_msg *um, pid_t *pid, int *fd)
+void unsc_msg_pid_fd(struct unsc_msg *um, pid_t *pid, int *fd)
 {
 	struct cmsghdr *ch;
 	struct ucred *ucred;
@@ -1321,7 +1305,7 @@ static int usernsd(int sk)
 		int flags, fd, ret;
 		pid_t pid;
 
-		unsc_msg_init(&um, &call, &flags, msg, sizeof(msg), 0);
+		unsc_msg_init(&um, &call, &flags, msg, sizeof(msg), 0, NULL);
 		if (recvmsg(sk, &um.h, 0) <= 0) {
 			pr_perror("uns: recv req error");
 			return -1;
@@ -1366,7 +1350,7 @@ static int usernsd(int sk)
 		else
 			fd = -1;
 
-		unsc_msg_init(&um, &call, &ret, NULL, 0, fd);
+		unsc_msg_init(&um, &call, &ret, NULL, 0, fd, NULL);
 		if (sendmsg(sk, &um.h, 0) <= 0) {
 			pr_perror("uns: send resp error");
 			return -1;
@@ -1417,7 +1401,7 @@ int __userns_call(const char *func_name, uns_call_t call, int flags, void *arg,
 
 	/* Send the request */
 
-	unsc_msg_init(&um, &call, &flags, arg, arg_size, fd);
+	unsc_msg_init(&um, &call, &flags, arg, arg_size, fd, NULL);
 	ret = sendmsg(sk, &um.h, 0);
 	if (ret <= 0) {
 		pr_perror("uns: send req error");
@@ -1432,7 +1416,7 @@ int __userns_call(const char *func_name, uns_call_t call, int flags, void *arg,
 
 	/* Get the response back */
 
-	unsc_msg_init(&um, &call, &res, NULL, 0, 0);
+	unsc_msg_init(&um, &call, &res, NULL, 0, 0, NULL);
 	ret = recvmsg(sk, &um.h, 0);
 	if (ret <= 0) {
 		pr_perror("uns: recv resp error");
@@ -1453,14 +1437,11 @@ out:
 	return ret;
 }
 
-static int start_usernsd(void)
+int start_unix_cred_daemon(pid_t *pid, int (*daemon_func)(int sk))
 {
 	int sk[2];
 	int one = 1;
 
-	if (!(root_ns_mask & CLONE_NEWUSER))
-		return 0;
-
 	/*
 	 * Seqpacket to
 	 *
@@ -1468,7 +1449,7 @@ static int start_usernsd(void)
 	 *    each other easily. Stream socket require manual
 	 *    messages boundaries.
 	 *
-	 * b) Make callers note the damon death by seeing the
+	 * b) Make callers note the daemon death by seeing the
 	 *    disconnected socket. In case of dgram socket
 	 *    callers would just get stuck in receiving the
 	 *    response.
@@ -1489,24 +1470,39 @@ static int start_usernsd(void)
 		return -1;
 	}
 
-	usernsd_pid = fork();
-	if (usernsd_pid < 0) {
-		pr_perror("Can't fork usernsd");
+	*pid = fork();
+	if (*pid < 0) {
+		pr_perror("Can't unix daemon");
 		close(sk[0]);
 		close(sk[1]);
 		return -1;
 	}
 
-	if (usernsd_pid == 0) {
+	if (*pid == 0) {
 		int ret;
-
 		close(sk[0]);
-		ret = usernsd(sk[1]);
+		ret = daemon_func(sk[1]);
 		exit(ret);
 	}
-
 	close(sk[1]);
-	if (install_service_fd(USERNSD_SK, sk[0]) < 0) {
+
+	return sk[0];
+}
+
+static int start_usernsd(void)
+{
+	int sk;
+
+	if (!(root_ns_mask & CLONE_NEWUSER))
+		return 0;
+
+	sk = start_unix_cred_daemon(&usernsd_pid, usernsd);
+	if (sk < 0) {
+		pr_err("failed to start usernsd\n");
+		return -1;
+	}
+
+	if (install_service_fd(USERNSD_SK, sk) < 0) {
 		kill(usernsd_pid, SIGKILL);
 		waitpid(usernsd_pid, NULL, 0);
 		return -1;
@@ -1623,10 +1619,12 @@ int collect_namespaces(bool for_dump)
 
 int prepare_userns_creds(void)
 {
-	/* UID and GID must be set after restoring /proc/PID/{uid,gid}_maps */
-	if (setuid(0) || setgid(0) || setgroups(0, NULL)) {
-		pr_perror("Unable to initialize id-s");
-		return -1;
+	if (!opts.unprivileged || has_cap_setuid(opts.cap_eff)) {
+		/* UID and GID must be set after restoring /proc/PID/{uid,gid}_maps */
+		if (setuid(0) || setgid(0) || setgroups(0, NULL)) {
+			pr_perror("Unable to initialize id-s");
+			return -1;
+		}
 	}
 
 	/*
diff --git a/criu/net.c b/criu/net.c
index 2eff519c5..e5775a328 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -51,6 +51,9 @@
 #include "images/netdev.pb-c.h"
 #include "images/inventory.pb-c.h"
 
+#undef LOG_PREFIX
+#define LOG_PREFIX "net: "
+
 #ifndef IFLA_NEW_IFINDEX
 #define IFLA_NEW_IFINDEX 49
 #endif
@@ -108,15 +111,18 @@ int read_ns_sys_file(char *path, char *buf, int len)
 	}
 
 	rlen = read(fd, buf, len);
+	if (rlen == -1)
+		pr_perror("Can't read ns' %s", path);
 	close(fd);
 
 	if (rlen == len) {
+		buf[0] = '\0';
 		pr_err("Too small buffer to read ns sys file %s\n", path);
 		return -1;
 	}
 
-	if (rlen > 0)
-		buf[rlen - 1] = '\0';
+	if (rlen >= 0)
+		buf[rlen] = '\0';
 
 	return rlen;
 }
@@ -353,22 +359,23 @@ static int ipv6_conf_op(char *tgt, SysctlEntry **conf, int n, int op, SysctlEntr
 	return net_conf_op(tgt, conf, n, op, "ipv6", req, path, ARRAY_SIZE(devconfs6), devconfs6, def_conf);
 }
 
-static int unix_conf_op(SysctlEntry ***rconf, size_t *n, int op)
+static int unix_conf_op(SysctlEntry ***rconf, size_t *pn, int op)
 {
 	int i, ret = -1, flags = 0;
 	char path[ARRAY_SIZE(unix_conf_entries)][MAX_CONF_UNIX_PATH] = {};
 	struct sysctl_req req[ARRAY_SIZE(unix_conf_entries)] = {};
 	SysctlEntry **conf = *rconf;
+	size_t n = *pn;
 
-	if (*n != ARRAY_SIZE(unix_conf_entries)) {
-		pr_err("unix: Unexpected entries in config (%zu %zu)\n", *n, ARRAY_SIZE(unix_conf_entries));
+	if (n != ARRAY_SIZE(unix_conf_entries)) {
+		pr_err("unix: Unexpected entries in config (%zu %zu)\n", n, ARRAY_SIZE(unix_conf_entries));
 		return -EINVAL;
 	}
 
 	if (opts.weak_sysctls || op == CTL_READ)
 		flags = CTL_FLAGS_OPTIONAL;
 
-	for (i = 0; i < *n; i++) {
+	for (i = 0; i < n; i++) {
 		snprintf(path[i], MAX_CONF_UNIX_PATH, CONF_UNIX_FMT, unix_conf_entries[i]);
 		req[i].name = path[i];
 		req[i].flags = flags;
@@ -384,7 +391,7 @@ static int unix_conf_op(SysctlEntry ***rconf, size_t *n, int op)
 		}
 	}
 
-	ret = sysctl_op(req, *n, op, CLONE_NEWNET);
+	ret = sysctl_op(req, n, op, CLONE_NEWNET);
 	if (ret < 0) {
 		pr_err("unix: Failed to %s %s/<confs>\n", (op == CTL_READ) ? "read" : "write", CONF_UNIX_BASE);
 		return -1;
@@ -393,7 +400,7 @@ static int unix_conf_op(SysctlEntry ***rconf, size_t *n, int op)
 	if (op == CTL_READ) {
 		bool has_entries = false;
 
-		for (i = 0; i < *n; i++) {
+		for (i = 0; i < n; i++) {
 			if (req[i].flags & CTL_FLAGS_HAS) {
 				conf[i]->has_iarg = true;
 				if (!has_entries)
@@ -406,7 +413,7 @@ static int unix_conf_op(SysctlEntry ***rconf, size_t *n, int op)
 		 * Unix conf is optional.
 		 */
 		if (!has_entries) {
-			*n = 0;
+			*pn = 0;
 			*rconf = NULL;
 		}
 	}
@@ -1398,7 +1405,7 @@ static int move_veth(const char *netdev, struct ns_id *ns, struct net_link *link
 	len_val = strlen(netdev);
 	if (len_val >= IFNAMSIZ)
 		return -1;
-	strlcpy(mvreq.ifnam, netdev, IFNAMSIZ);
+	__strlcpy(mvreq.ifnam, netdev, IFNAMSIZ);
 
 	ret = userns_call(move_veth_cb, 0, &mvreq, sizeof(mvreq), ns->net.ns_fd);
 	if (ret < 0)
@@ -1528,7 +1535,7 @@ static int changeflags(int s, char *name, short flags)
 {
 	struct ifreq ifr;
 
-	strlcpy(ifr.ifr_name, name, IFNAMSIZ);
+	__strlcpy(ifr.ifr_name, name, IFNAMSIZ);
 	ifr.ifr_flags = flags;
 
 	if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) {
@@ -2039,10 +2046,10 @@ static inline int dump_iptables(struct cr_imgset *fds)
 	 * and iptables backend is nft to prevent duplicate dumps.
 	 */
 #if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
-	iptables_cmd = get_legacy_iptables_bin(false);
+	iptables_cmd = get_legacy_iptables_bin(false, false);
 
 	if (kdat.ipv6)
-		ip6tables_cmd = get_legacy_iptables_bin(true);
+		ip6tables_cmd = get_legacy_iptables_bin(true, false);
 #endif
 
 	if (!iptables_cmd) {
@@ -2121,6 +2128,117 @@ nft_ctx_free_out:
 }
 #endif
 
+static const char *ipv4_sysctl_entries[] = {
+	"ping_group_range",
+};
+
+#define IPV4_SYSCTL_BASE "net/ipv4"
+#define IPV4_SYSCTL_FMT IPV4_SYSCTL_BASE"/%s"
+#define MAX_IPV4_SYSCTL_OPT 32
+#define MAX_IPV4_SYSCTL_PATH (sizeof(IPV4_SYSCTL_FMT) + MAX_IPV4_SYSCTL_OPT - 2)
+#define MAX_STR_IPV4_SYSCTL_LEN 200
+
+static int ipv4_sysctls_op(SysctlEntry ***rsysctl, size_t *pn, int op)
+{
+	int i, ret = -1, flags = 0;
+	char path[ARRAY_SIZE(ipv4_sysctl_entries)][MAX_IPV4_SYSCTL_PATH] = {};
+	struct sysctl_req req[ARRAY_SIZE(ipv4_sysctl_entries)] = {};
+	SysctlEntry **sysctl = *rsysctl;
+	size_t n = *pn, ri;
+
+	if (n != ARRAY_SIZE(ipv4_sysctl_entries)) {
+		pr_err("ipv4: Unexpected entries in sysctl (%zu %zu)\n", n, ARRAY_SIZE(ipv4_sysctl_entries));
+		return -EINVAL;
+	}
+
+	if (opts.weak_sysctls || op == CTL_READ)
+		flags = CTL_FLAGS_OPTIONAL;
+
+	for (i = 0, ri = 0; i < n; i++) {
+		snprintf(path[ri], MAX_IPV4_SYSCTL_PATH, IPV4_SYSCTL_FMT, ipv4_sysctl_entries[i]);
+		req[ri].name = path[ri];
+		req[ri].flags = flags;
+
+		switch (sysctl[i]->type) {
+		case SYSCTL_TYPE__CTL_STR:
+			req[ri].type = CTL_STR(MAX_STR_IPV4_SYSCTL_LEN);
+
+			/* skip write if have no value */
+			if (op == CTL_WRITE && !sysctl[i]->sarg)
+				continue;
+
+			req[ri].arg = sysctl[i]->sarg;
+			break;
+		default:
+			pr_err("ipv4: Unknown sysctl type %d\n", sysctl[i]->type);
+			return -1;
+		}
+		ri++;
+	}
+
+	ret = sysctl_op(req, ri, op, CLONE_NEWNET);
+	if (ret < 0) {
+		pr_err("ipv4: Failed to %s %s/<sysctls>\n", (op == CTL_READ) ? "read" : "write", IPV4_SYSCTL_BASE);
+		return -1;
+	}
+
+	if (op == CTL_READ) {
+		bool has_entries = false;
+
+		BUG_ON(ri != n);
+		for (i = 0; i < n; i++) {
+			if (req[i].flags & CTL_FLAGS_HAS) {
+				has_entries = true;
+			} else {
+				sysctl[i]->sarg = NULL;
+			}
+		}
+
+		if (!has_entries) {
+			*pn = 0;
+			*rsysctl = NULL;
+		}
+	}
+
+	return 0;
+}
+
+static int ipv4_sysctls_ping_group_range_map_gid(SysctlEntry *ent, size_t size)
+{
+	int start, end, ustart, uend, ret;
+
+	if (sscanf(ent->sarg, "%d %d", &start, &end) != 2) {
+		pr_err("Failed to parse ping_group_range: %s\n", ent->sarg);
+		return -1;
+	}
+
+	/*
+	 * The default is "1 0", which means no group
+	 * is allowed to create ICMP Echo sockets.
+	 */
+	if (start == 1 && end == 0) {
+		pr_debug("The ping_group_range is set to default, skipping it.\n");
+		ent->sarg = NULL;
+		return 0;
+	}
+
+	if (!(root_ns_mask & CLONE_NEWUSER))
+		return 0;
+
+	ustart = userns_gid(start);
+	uend = userns_gid(end);
+	pr_debug("Mapping ping_group_range %d %d to userns -> %d %d\n",
+		 start, end, ustart, uend);
+
+	ret = snprintf(ent->sarg, size, "%d\t%d\n", ustart, uend);
+	if (ret < 0 || ret >= size) {
+		pr_err("Failed to map ping_group_range: %d\t%d\n", ustart, uend);
+		return -1;
+	}
+
+	return 0;
+}
+
 static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 {
 	void *buf, *o_buf;
@@ -2135,6 +2253,10 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 	int size6 = ARRAY_SIZE(devconfs6);
 	char def_stable_secret[MAX_STR_CONF_LEN + 1] = {};
 	char all_stable_secret[MAX_STR_CONF_LEN + 1] = {};
+	SysctlEntry *ipv4_sysctls = NULL;
+	size_t ipv4_sysctl_size = ARRAY_SIZE(ipv4_sysctl_entries);
+	char ping_group_range[MAX_STR_IPV4_SYSCTL_LEN + 1] = {};
+	int ping_group_range_id = -1;
 	NetnsId *ids;
 	struct netns_id *p;
 
@@ -2142,10 +2264,16 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 	list_for_each_entry(p, &ns->net.ids, node)
 		i++;
 
+	/*
+	 * Here we allocate one single big buffer for storing multiple arrays
+	 * of protobuf entries and pointers to entries in it and we later use
+	 * xptr_pull_s to claim a part of this buffer of proper size for each
+	 * particular array. Next we read data from sysctl files to those
+	 * arrays and then finally save them into images.
+	 */
 	o_buf = buf = xmalloc(i * (sizeof(NetnsId *) + sizeof(NetnsId)) +
-			      size4 * (sizeof(SysctlEntry *) + sizeof(SysctlEntry)) * 2 +
-			      size6 * (sizeof(SysctlEntry *) + sizeof(SysctlEntry)) * 2 +
-			      sizex * (sizeof(SysctlEntry *) + sizeof(SysctlEntry)));
+			      (2 * size4 + 2 * size6 + sizex + ipv4_sysctl_size) *
+			      (sizeof(SysctlEntry *) + sizeof(SysctlEntry)));
 	if (!buf)
 		goto out;
 
@@ -2210,6 +2338,22 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 		netns.unix_conf[i]->type = SYSCTL_TYPE__CTL_32;
 	}
 
+	netns.n_ipv4_sysctl = ipv4_sysctl_size;
+	netns.ipv4_sysctl = xptr_pull_s(&buf, ipv4_sysctl_size * sizeof(SysctlEntry *));
+	ipv4_sysctls = xptr_pull_s(&buf, ipv4_sysctl_size * sizeof(SysctlEntry));
+	for (i = 0; i < ipv4_sysctl_size; i++) {
+		sysctl_entry__init(&ipv4_sysctls[i]);
+		netns.ipv4_sysctl[i] = &ipv4_sysctls[i];
+		if (!strcmp(ipv4_sysctl_entries[i], "ping_group_range")) {
+			netns.ipv4_sysctl[i]->type = SYSCTL_TYPE__CTL_STR;
+			netns.ipv4_sysctl[i]->sarg = ping_group_range;
+			ping_group_range_id = i;
+		} else {
+			/* Need to handle this case when we have more sysctls */
+			BUG();
+		}
+	}
+
 	ret = ipv4_conf_op("default", netns.def_conf4, size4, CTL_READ, NULL);
 	if (ret < 0)
 		goto err_free;
@@ -2228,6 +2372,16 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
 	if (ret < 0)
 		goto err_free;
 
+	ret = ipv4_sysctls_op(&netns.ipv4_sysctl, &netns.n_ipv4_sysctl, CTL_READ);
+	if (ret < 0)
+		goto err_free;
+
+	BUG_ON(ping_group_range_id == -1);
+	ret = ipv4_sysctls_ping_group_range_map_gid(netns.ipv4_sysctl[ping_group_range_id],
+						    MAX_STR_IPV4_SYSCTL_LEN + 1);
+	if (ret < 0)
+		goto err_free;
+
 	ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS);
 err_free:
 	xfree(o_buf);
@@ -2360,9 +2514,19 @@ static int prepare_xtable_lock(void)
 
 static inline int restore_iptables(int pid)
 {
+	char *iptables_cmd = "iptables-restore";
+	char *ip6tables_cmd = "ip6tables-restore";
+	char comm[32];
 	int ret = -1;
 	struct cr_img *img;
 
+#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
+	iptables_cmd = get_legacy_iptables_bin(false, true);
+
+	if (kdat.ipv6)
+		ip6tables_cmd = get_legacy_iptables_bin(true, true);
+#endif
+
 	img = open_image(CR_FD_IPTABLES, O_RSTR, pid);
 	if (img == NULL)
 		return -1;
@@ -2372,7 +2536,19 @@ static inline int restore_iptables(int pid)
 		goto ipt6;
 	}
 
-	ret = run_iptables_tool("iptables-restore -w", img_raw_fd(img), -1);
+	if (!iptables_cmd) {
+		pr_err("Can't restore iptables dump - no legacy version present\n");
+		close_image(img);
+		return -1;
+	}
+
+	if (snprintf(comm, sizeof(comm), "%s -w", iptables_cmd) >= sizeof(comm)) {
+		pr_err("Can't fit '%s -w' to buffer\n", iptables_cmd);
+		close_image(img);
+		return -1;
+	}
+
+	ret = run_iptables_tool(comm, img_raw_fd(img), -1);
 	close_image(img);
 	if (ret)
 		return ret;
@@ -2383,7 +2559,19 @@ ipt6:
 	if (empty_image(img))
 		goto out;
 
-	ret = run_iptables_tool("ip6tables-restore -w", img_raw_fd(img), -1);
+	if (!ip6tables_cmd) {
+		pr_err("Can't restore ip6tables dump - no legacy version present\n");
+		close_image(img);
+		return -1;
+	}
+
+	if (snprintf(comm, sizeof(comm), "%s -w", ip6tables_cmd) >= sizeof(comm)) {
+		pr_err("Can't fit '%s -w' to buffer\n", ip6tables_cmd);
+		close_image(img);
+		return -1;
+	}
+
+	ret = run_iptables_tool(comm, img_raw_fd(img), -1);
 out:
 	close_image(img);
 
@@ -2391,58 +2579,85 @@ out:
 }
 
 #if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
-static inline int restore_nftables(int pid)
+static inline int do_restore_nftables(struct cr_img *img)
 {
-	int ret = -1;
-	struct cr_img *img;
+	int exit_code = -1;
 	struct nft_ctx *nft;
 	off_t img_data_size;
 	char *buf;
 
+	if ((img_data_size = img_raw_size(img)) < 0) {
+		pr_err("image size mismatch\n");
+		goto out;
+	}
+
+	if (read_img_str(img, &buf, img_data_size) < 0) {
+		pr_err("Failed to read nftables data\n");
+		goto out;
+	}
+
+	nft = nft_ctx_new(NFT_CTX_DEFAULT);
+	if (!nft) {
+		pr_err("Failed to create nft context object\n");
+		goto buf_free_out;
+	}
+
+	if (nft_ctx_buffer_output(nft) || nft_ctx_buffer_error(nft)) {
+		pr_err("Failed to enable std/err output buffering\n");
+		goto nft_ctx_free_out;
+	}
+
+#if defined(CONFIG_HAS_NFTABLES_LIB_API_0)
+	if (nft_run_cmd_from_buffer(nft, buf, strlen(buf)))
+#elif defined(CONFIG_HAS_NFTABLES_LIB_API_1)
+	if (nft_run_cmd_from_buffer(nft, buf))
+#else
+	BUILD_BUG_ON(1);
+#endif
+	{
+		pr_err("nft command error:\n%s\n%s\n",
+		       nft_ctx_get_error_buffer(nft), buf);
+		goto nft_ctx_free_out;
+	}
+
+	exit_code = 0;
+
+nft_ctx_free_out:
+	nft_ctx_free(nft);
+buf_free_out:
+	xfree(buf);
+out:
+	return exit_code;
+}
+#endif
+
+static inline int restore_nftables(int pid)
+{
+	int exit_code = -1;
+	struct cr_img *img;
+
 	img = open_image(CR_FD_NFTABLES, O_RSTR, pid);
 	if (img == NULL)
 		return -1;
 	if (empty_image(img)) {
 		/* Backward compatibility */
 		pr_info("Skipping nft restore, no image\n");
-		ret = 0;
+		exit_code = 0;
 		goto image_close_out;
 	}
 
-	if ((img_data_size = img_raw_size(img)) < 0)
-		goto image_close_out;
-
-	if (read_img_str(img, &buf, img_data_size) < 0)
-		goto image_close_out;
-
-	nft = nft_ctx_new(NFT_CTX_DEFAULT);
-	if (!nft)
-		goto buf_free_out;
-
-	if (nft_ctx_buffer_output(nft) || nft_ctx_buffer_error(nft) ||
-#if defined(CONFIG_HAS_NFTABLES_LIB_API_0)
-	    nft_run_cmd_from_buffer(nft, buf, strlen(buf)))
-#elif defined(CONFIG_HAS_NFTABLES_LIB_API_1)
-	    nft_run_cmd_from_buffer(nft, buf))
+#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
+	if (!do_restore_nftables(img))
+		exit_code = 0;
 #else
-	{
-		BUILD_BUG_ON(1);
-	}
+	pr_err("Unable to restore nftables. CRIU was built without libnftables support\n");
 #endif
-		goto nft_ctx_free_out;
 
-	ret = 0;
-
-nft_ctx_free_out:
-	nft_ctx_free(nft);
-buf_free_out:
-	xfree(buf);
 image_close_out:
 	close_image(img);
 
-	return ret;
+	return exit_code;
 }
-#endif
 
 int read_net_ns_img(void)
 {
@@ -2519,6 +2734,12 @@ static int restore_netns_conf(struct ns_id *ns)
 			goto out;
 	}
 
+	if ((netns)->ipv4_sysctl) {
+		ret = ipv4_sysctls_op(&(netns)->ipv4_sysctl, &(netns)->n_ipv4_sysctl, CTL_WRITE);
+		if (ret)
+			goto out;
+	}
+
 	ns->net.netns = netns;
 out:
 	return ret;
@@ -2771,10 +2992,8 @@ static int prepare_net_ns_second_stage(struct ns_id *ns)
 			ret = restore_rule(nsid);
 		if (!ret)
 			ret = restore_iptables(nsid);
-#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
 		if (!ret)
 			ret = restore_nftables(nsid);
-#endif
 	}
 
 	if (!ret)
@@ -3000,11 +3219,45 @@ err:
 	return ret;
 }
 
-static inline int nftables_lock_network_internal(void)
+#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
+static inline FILE *redirect_nftables_output(struct nft_ctx *nft)
+{
+	FILE *fp;
+	int fd;
+
+	fd = dup(log_get_fd());
+	if (fd < 0) {
+		pr_perror("dup() to redirect nftables output failed");
+		return NULL;
+	}
+
+	fp = fdopen(fd, "w");
+	if (!fp) {
+		pr_perror("fdopen() to redirect nftables output failed");
+		return NULL;
+	}
+
+	/**
+	 * Without setvbuf() the output from libnftables will be
+	 * somewhere in the log file, probably at the end.
+	 * With setvbuf() potential output will be at the correct
+	 * position.
+	 */
+	setvbuf(fp, NULL, _IONBF, 0);
+
+	nft_ctx_set_output(nft, fp);
+	nft_ctx_set_error(nft, fp);
+
+	return fp;
+}
+#endif
+
+static inline int nftables_lock_network_internal(bool restore)
 {
 #if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
+	cleanup_file FILE *fp = NULL;
 	struct nft_ctx *nft;
-	int ret = 0;
+	int ret = 0, exit_code = -1;
 	char table[32];
 	char buf[128];
 
@@ -3015,10 +3268,19 @@ static inline int nftables_lock_network_internal(void)
 	if (!nft)
 		return -1;
 
-	snprintf(buf, sizeof(buf), "create table %s", table);
-	if (NFT_RUN_CMD(nft, buf))
+	fp = redirect_nftables_output(nft);
+	if (!fp)
 		goto err2;
 
+	snprintf(buf, sizeof(buf), "create table %s", table);
+	ret = NFT_RUN_CMD(nft, buf);
+	if (ret) {
+		/* The network has been locked on dump. */
+		if (restore && errno == EEXIST)
+			return 0;
+		goto err2;
+	}
+
 	snprintf(buf, sizeof(buf), "add chain %s output { type filter hook output priority 0; policy drop; }", table);
 	if (NFT_RUN_CMD(nft, buf))
 		goto err1;
@@ -3035,17 +3297,16 @@ static inline int nftables_lock_network_internal(void)
 	if (NFT_RUN_CMD(nft, buf))
 		goto err1;
 
-	goto out;
-
+	exit_code = 0;
+out:
+	nft_ctx_free(nft);
+	return exit_code;
 err1:
 	snprintf(buf, sizeof(buf), "delete table %s", table);
 	NFT_RUN_CMD(nft, buf);
 err2:
-	ret = -1;
 	pr_err("Locking network failed using nftables\n");
-out:
-	nft_ctx_free(nft);
-	return ret;
+	goto out;
 #else
 	pr_err("CRIU was built without libnftables support\n");
 	return -1;
@@ -3077,17 +3338,20 @@ static int iptables_network_lock_internal(void)
 	return ret;
 }
 
-int network_lock_internal(void)
+int network_lock_internal(bool restore)
 {
 	int ret = 0, nsret;
 
+	if (opts.network_lock_method == NETWORK_LOCK_SKIP)
+		return 0;
+
 	if (switch_ns(root_item->pid->real, &net_ns_desc, &nsret))
 		return -1;
 
 	if (opts.network_lock_method == NETWORK_LOCK_IPTABLES)
 		ret = iptables_network_lock_internal();
 	else if (opts.network_lock_method == NETWORK_LOCK_NFTABLES)
-		ret = nftables_lock_network_internal();
+		ret = nftables_lock_network_internal(restore);
 
 	if (restore_ns(nsret, &net_ns_desc))
 		ret = -1;
@@ -3099,6 +3363,7 @@ static inline int nftables_network_unlock(void)
 {
 #if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
 	int ret = 0;
+	cleanup_file FILE *fp = NULL;
 	struct nft_ctx *nft;
 	char table[32];
 	char buf[128];
@@ -3110,6 +3375,10 @@ static inline int nftables_network_unlock(void)
 	if (!nft)
 		return -1;
 
+	fp = redirect_nftables_output(nft);
+	if (!fp)
+		return -1;
+
 	snprintf(buf, sizeof(buf), "delete table %s", table);
 	if (NFT_RUN_CMD(nft, buf))
 		ret = -1;
@@ -3122,19 +3391,53 @@ static inline int nftables_network_unlock(void)
 #endif
 }
 
+static bool iptables_has_criu_jump_target(void)
+{
+	int fd, ret;
+	char *argv[4] = { "sh", "-c", "iptables -C INPUT -j CRIU", NULL };
+
+	fd = open("/dev/null", O_RDWR);
+	if (fd < 0) {
+		fd = -1;
+		pr_perror("failed to open /dev/null, using log fd");
+	}
+
+	ret = cr_system(fd, fd, fd, "sh", argv, CRS_CAN_FAIL);
+	close_safe(&fd);
+	return !ret;
+}
+
 static int iptables_network_unlock_internal(void)
 {
-	char conf[] = "*filter\n"
-		      ":CRIU - [0:0]\n"
-		      "-D INPUT -j CRIU\n"
-		      "-D OUTPUT -j CRIU\n"
-		      "-X CRIU\n"
-		      "COMMIT\n";
+	char delete_jump_targets[] = "*filter\n"
+				     ":CRIU - [0:0]\n"
+				     "-D INPUT -j CRIU\n"
+				     "-D OUTPUT -j CRIU\n"
+				     "COMMIT\n";
+
+	char delete_criu_chain[] = "*filter\n"
+				   ":CRIU - [0:0]\n"
+				   "-X CRIU\n"
+				   "COMMIT\n";
+
 	int ret = 0;
 
-	ret |= iptables_restore(false, conf, sizeof(conf) - 1);
+	ret |= iptables_restore(false, delete_jump_targets, sizeof(delete_jump_targets) - 1);
 	if (kdat.ipv6)
-		ret |= iptables_restore(true, conf, sizeof(conf) - 1);
+		ret |= iptables_restore(true, delete_jump_targets, sizeof(delete_jump_targets) - 1);
+
+	/* For compatibility with iptables-nft backend, we need to make sure that all jump
+	 * targets have been removed before deleting the CRIU chain.
+	 */
+	if (iptables_has_criu_jump_target()) {
+		ret |= iptables_restore(false, delete_jump_targets, sizeof(delete_jump_targets) - 1);
+		if (kdat.ipv6)
+			ret |= iptables_restore(true, delete_jump_targets, sizeof(delete_jump_targets) - 1);
+	}
+
+	ret |= iptables_restore(false, delete_criu_chain, sizeof(delete_criu_chain) - 1);
+	if (kdat.ipv6)
+		ret |= iptables_restore(true, delete_criu_chain, sizeof(delete_criu_chain) - 1);
 
 	return ret;
 }
@@ -3143,6 +3446,9 @@ static int network_unlock_internal(void)
 {
 	int ret = 0, nsret;
 
+	if (opts.network_lock_method == NETWORK_LOCK_SKIP)
+		return 0;
+
 	if (switch_ns(root_item->pid->real, &net_ns_desc, &nsret))
 		return -1;
 
@@ -3171,7 +3477,7 @@ int network_lock(void)
 	if (run_scripts(ACT_NET_LOCK))
 		return -1;
 
-	return network_lock_internal();
+	return network_lock_internal(false);
 }
 
 void network_unlock(void)
@@ -3215,7 +3521,7 @@ int macvlan_ext_add(struct external *ext)
 /*
  * The setns() syscall (called by switch_ns()) can be extremely
  * slow. If we call it two or more times from the same task the
- * kernel will synchonously go on a very slow routine called
+ * kernel will synchronously go on a very slow routine called
  * synchronize_rcu() trying to put a reference on old namespaces.
  *
  * To avoid doing this more than once we pre-create all the
@@ -3366,7 +3672,7 @@ int collect_net_namespaces(bool for_dump)
 
 struct ns_desc net_ns_desc = NS_DESC_ENTRY(CLONE_NEWNET, "net");
 
-struct ns_id *net_get_root_ns()
+struct ns_id *net_get_root_ns(void)
 {
 	static struct ns_id *root_netns = NULL;
 
@@ -3383,7 +3689,7 @@ struct ns_id *net_get_root_ns()
 
 /*
  * socket_diag doesn't report unbound and unconnected sockets,
- * so we have to get their network namesapces explicitly
+ * so we have to get their network namespaces explicitly
  */
 struct ns_id *get_socket_ns(int lfd)
 {
@@ -3483,7 +3789,7 @@ static int move_to_bridge(struct external *ext, void *arg)
 			ret = -1;
 			goto out;
 		}
-		strlcpy(ifr.ifr_name, br, IFNAMSIZ);
+		__strlcpy(ifr.ifr_name, br, IFNAMSIZ);
 		ret = ioctl(s, SIOCBRADDIF, &ifr);
 		if (ret < 0) {
 			pr_perror("Can't add interface %s to bridge %s", out, br);
@@ -3495,7 +3801,7 @@ static int move_to_bridge(struct external *ext, void *arg)
 		 * $ ip link set dev <device> up
 		 */
 		ifr.ifr_ifindex = 0;
-		strlcpy(ifr.ifr_name, out, IFNAMSIZ);
+		__strlcpy(ifr.ifr_name, out, IFNAMSIZ);
 		ret = ioctl(s, SIOCGIFFLAGS, &ifr);
 		if (ret < 0) {
 			pr_perror("Can't get flags of interface %s", out);
diff --git a/criu/netfilter.c b/criu/netfilter.c
index 2212fd9f2..e2c82764f 100644
--- a/criu/netfilter.c
+++ b/criu/netfilter.c
@@ -48,8 +48,8 @@ void preload_netfilter_modules(void)
 		fd = -1;
 		pr_perror("failed to open /dev/null, using log fd for net module preload");
 	}
-	cr_system(fd, fd, fd, iptable_cmd_ipv4, (char *[]){ iptable_cmd_ipv4, "-L", "-n", NULL }, 0);
-	cr_system(fd, fd, fd, iptable_cmd_ipv6, (char *[]){ iptable_cmd_ipv6, "-L", "-n", NULL }, 0);
+	cr_system(fd, fd, fd, iptable_cmd_ipv4, (char *[]){ iptable_cmd_ipv4, "-L", "-n", NULL }, CRS_CAN_FAIL);
+	cr_system(fd, fd, fd, iptable_cmd_ipv6, (char *[]){ iptable_cmd_ipv6, "-L", "-n", NULL }, CRS_CAN_FAIL);
 	close_safe(&fd);
 }
 
@@ -299,7 +299,25 @@ int nftables_lock_connection(struct inet_sk_desc *sk)
 
 int nftables_get_table(char *table, int n)
 {
-	if (snprintf(table, n, "inet CRIU-%d", root_item->pid->real) < 0) {
+	int ret;
+
+	switch(dump_criu_run_id[0]) {
+	case 0:
+		/* This is not a restore.*/
+		ret = snprintf(table, n, "inet CRIU-%s", criu_run_id);
+		break;
+	case NO_DUMP_CRIU_RUN_ID:
+		/**
+		 * This is a restore from an older image with no
+		 * dump_criu_run_id available. Let's use the old ID.
+		 */
+		ret = snprintf(table, n, "inet CRIU-%d", root_item->pid->real);
+		break;
+	default:
+		ret = snprintf(table, n, "inet CRIU-%s", dump_criu_run_id);
+	}
+
+	if (ret < 0) {
 		pr_err("Cannot generate CRIU's nftables table name\n");
 		return -1;
 	}
diff --git a/criu/page-pipe.c b/criu/page-pipe.c
index 54dc3ccc4..4601d8f9c 100644
--- a/criu/page-pipe.c
+++ b/criu/page-pipe.c
@@ -99,6 +99,7 @@ static struct page_pipe_buf *ppb_alloc(struct page_pipe *pp, unsigned int ppb_fl
 {
 	struct page_pipe_buf *prev = pp_prev_ppb(pp, ppb_flags);
 	struct page_pipe_buf *ppb;
+	int ppb_size = 0;
 
 	ppb = xmalloc(sizeof(*ppb));
 	if (!ppb)
@@ -120,7 +121,13 @@ static struct page_pipe_buf *ppb_alloc(struct page_pipe *pp, unsigned int ppb_fl
 		cnt_add(CNT_PAGE_PIPES, 1);
 
 		ppb->pipe_off = 0;
-		ppb->pipe_size = fcntl(ppb->p[0], F_GETPIPE_SZ, 0) / PAGE_SIZE;
+		ppb_size = fcntl(ppb->p[0], F_GETPIPE_SZ, 0);
+		if (ppb_size < 0) {
+			xfree(ppb);
+			pr_perror("Can't get pipe size");
+			return NULL;
+		}
+		ppb->pipe_size = ppb_size / PAGE_SIZE;
 		pp->nr_pipes++;
 	}
 
@@ -374,7 +381,7 @@ int pipe_read_dest_init(struct pipe_read_dest *prd)
 	return 0;
 }
 
-int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, unsigned long addr, unsigned int *nr_pages,
+int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, unsigned long addr, unsigned long int *nr_pages,
 		   unsigned int ppb_flags)
 {
 	struct page_pipe_buf *ppb;
@@ -399,7 +406,7 @@ int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, unsigned lo
 	}
 
 	/* clamp the request if it passes the end of iovec */
-	len = min((unsigned long)iov->iov_base + iov->iov_len - addr, (unsigned long)(*nr_pages) * PAGE_SIZE);
+	len = min((unsigned long)iov->iov_base + iov->iov_len - addr, *nr_pages * PAGE_SIZE);
 	*nr_pages = len / PAGE_SIZE;
 
 	skip += ppb->pipe_off * PAGE_SIZE;
@@ -439,17 +446,17 @@ void debug_show_page_pipe(struct page_pipe *pp)
 	pr_debug("Page pipe:\n");
 	pr_debug("* %u pipes %u/%u iovs:\n", pp->nr_pipes, pp->free_iov, pp->nr_iovs);
 	list_for_each_entry(ppb, &pp->bufs, l) {
-		pr_debug("\tbuf %u pages, %u iovs, flags: %x pipe_off: %x :\n", ppb->pages_in, ppb->nr_segs, ppb->flags,
+		pr_debug("\tbuf %lx pages, %u iovs, flags: %x pipe_off: %lx :\n", ppb->pages_in, ppb->nr_segs, ppb->flags,
 			 ppb->pipe_off);
 		for (i = 0; i < ppb->nr_segs; i++) {
 			iov = &ppb->iov[i];
-			pr_debug("\t\t%p %lu\n", iov->iov_base, iov->iov_len / PAGE_SIZE);
+			pr_debug("\t\t%p - %p\n", iov->iov_base, iov->iov_base + iov->iov_len);
 		}
 	}
 
 	pr_debug("* %u holes:\n", pp->free_hole);
 	for (i = 0; i < pp->free_hole; i++) {
 		iov = &pp->holes[i];
-		pr_debug("\t%p %lu\n", iov->iov_base, iov->iov_len / PAGE_SIZE);
+		pr_debug("\t%p - %p\n", iov->iov_base, iov->iov_base + iov->iov_len);
 	}
 }
diff --git a/criu/page-xfer.c b/criu/page-xfer.c
index 782d4cafc..463d4c506 100644
--- a/criu/page-xfer.c
+++ b/criu/page-xfer.c
@@ -2,6 +2,7 @@
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <linux/falloc.h>
+#include <netinet/tcp.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -31,7 +32,7 @@ static int page_server_sk = -1;
 
 struct page_server_iov {
 	u32 cmd;
-	u32 nr_pages;
+	u64 nr_pages;
 	u64 vaddr;
 	u64 dst_id;
 };
@@ -157,18 +158,32 @@ static inline int send_psi(int sk, struct page_server_iov *pi)
 	return send_psi_flags(sk, pi, 0);
 }
 
+static void tcp_cork(int sk, bool on)
+{
+	int val = on ? 1 : 0;
+	if (setsockopt(sk, SOL_TCP, TCP_CORK, &val, sizeof(val)))
+		pr_pwarn("Unable to set TCP_CORK=%d", val);
+}
+
+static void tcp_nodelay(int sk, bool on)
+{
+	int val = on ? 1 : 0;
+	if (setsockopt(sk, SOL_TCP, TCP_NODELAY, &val, sizeof(val)))
+		pr_pwarn("Unable to set TCP_NODELAY=%d", val);
+}
+
 /* page-server xfer */
 static int write_pages_to_server(struct page_xfer *xfer, int p, unsigned long len)
 {
 	ssize_t ret, left = len;
 
 	if (opts.tls) {
-		pr_debug("Sending %lu bytes / %lu pages\n", len, len / PAGE_SIZE);
+		pr_debug("Sending %lx bytes\n", len);
 
 		if (tls_send_data_from_fd(p, len))
 			return -1;
 	} else {
-		pr_debug("Splicing %lu bytes / %lu pages into socket\n", len, len / PAGE_SIZE);
+		pr_debug("Splicing %lx bytes into socket\n", len);
 
 		while (left > 0) {
 			ret = splice(p, NULL, xfer->sk, NULL, left, SPLICE_F_MOVE);
@@ -177,7 +192,7 @@ static int write_pages_to_server(struct page_xfer *xfer, int p, unsigned long le
 				return -1;
 			}
 
-			pr_debug("\tSpliced: %lu bytes sent\n", (unsigned long)ret);
+			pr_debug("\tSpliced: %lx bytes sent\n", (unsigned long)ret);
 			left -= ret;
 		}
 	}
@@ -273,7 +288,7 @@ static int check_pagehole_in_parent(struct page_read *p, struct iovec *iov)
 	 * read_pagemap_page routine.
 	 */
 
-	pr_debug("Checking %p/%zu hole\n", iov->iov_base, iov->iov_len);
+	pr_debug("Checking %p - %p hole\n", iov->iov_base, iov->iov_base + iov->iov_len);
 	off = (unsigned long)iov->iov_base;
 	end = off + iov->iov_len;
 	while (1) {
@@ -285,7 +300,8 @@ static int check_pagehole_in_parent(struct page_read *p, struct iovec *iov)
 			return -1;
 		}
 
-		pr_debug("\tFound %" PRIx64 "/%lu\n", p->pe->vaddr, pagemap_len(p->pe));
+		pr_debug("\tFound %" PRIx64 " - %" PRIx64 "\n",
+			 p->pe->vaddr, p->pe->vaddr + pagemap_len(p->pe));
 
 		/*
 		 * The pagemap entry in parent may happen to be
@@ -311,6 +327,7 @@ static int write_pagemap_loc(struct page_xfer *xfer, struct iovec *iov, u32 flag
 	pe.nr_pages = iov->iov_len / PAGE_SIZE;
 	pe.has_flags = true;
 	pe.flags = flags;
+	pe.has_nr_pages = true;
 
 	if (flags & PE_PRESENT) {
 		if (opts.auto_dedup && xfer->parent != NULL) {
@@ -324,7 +341,8 @@ static int write_pagemap_loc(struct page_xfer *xfer, struct iovec *iov, u32 flag
 		if (xfer->parent != NULL) {
 			ret = check_pagehole_in_parent(xfer->parent, iov);
 			if (ret) {
-				pr_err("Hole %p/%zu not found in parent\n", iov->iov_base, iov->iov_len);
+				pr_err("Hole %p - %p not found in parent\n",
+				       iov->iov_base, iov->iov_base + iov->iov_len);
 				return -1;
 			}
 		}
@@ -834,7 +852,7 @@ int page_xfer_predump_pages(int pid, struct page_xfer *xfer, struct page_pipe *p
 
 			BUG_ON(iov.iov_base < (void *)xfer->offset);
 			iov.iov_base -= xfer->offset;
-			pr_debug("\t p %p [%u]\n", iov.iov_base, (unsigned int)(iov.iov_len / PAGE_SIZE));
+			pr_debug("\t p %p - %p\n", iov.iov_base, iov.iov_base + iov.iov_len);
 
 			flags = ppb_xfer_flags(xfer, ppb);
 
@@ -870,7 +888,7 @@ int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp)
 	list_for_each_entry(ppb, &pp->bufs, l) {
 		unsigned int i;
 
-		pr_debug("\tbuf %d/%d\n", ppb->pages_in, ppb->nr_segs);
+		pr_debug("\tbuf %lx/%d\n", ppb->pages_in, ppb->nr_segs);
 
 		for (i = 0; i < ppb->nr_segs; i++) {
 			struct iovec iov = ppb->iov[i];
@@ -882,7 +900,7 @@ int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp)
 
 			BUG_ON(iov.iov_base < (void *)xfer->offset);
 			iov.iov_base -= xfer->offset;
-			pr_debug("\tp %p [%u]\n", iov.iov_base, (unsigned int)(iov.iov_len / PAGE_SIZE));
+			pr_debug("\tp %p - %p\n", iov.iov_base, iov.iov_base + iov.iov_len);
 
 			flags = ppb_xfer_flags(xfer, ppb);
 
@@ -1055,7 +1073,8 @@ static int page_server_add(int sk, struct page_server_iov *pi, u32 flags)
 	struct page_xfer *lxfer = &cxfer.loc_xfer;
 	struct iovec iov;
 
-	pr_debug("Adding %" PRIx64 "/%u\n", pi->vaddr, pi->nr_pages);
+	pr_debug("Adding %" PRIx64 " - %" PRIx64 "\n",
+		 pi->vaddr, pi->vaddr + pi->nr_pages * PAGE_SIZE);
 
 	if (prep_loc_xfer(pi))
 		return -1;
@@ -1120,13 +1139,17 @@ static int page_server_get_pages(int sk, struct page_server_iov *pi)
 {
 	struct pstree_item *item;
 	struct page_pipe *pp;
-	unsigned long len;
+	unsigned long len, nr_pages;
 	int ret;
 
 	item = pstree_item_by_virt(pi->dst_id);
 	pp = dmpi(item)->mem_pp;
 
-	ret = page_pipe_read(pp, &pipe_read_dest, pi->vaddr, &pi->nr_pages, PPB_LAZY);
+	/* page_pipe_read() uses 'unsigned long *' but pi->nr_pages is u64.
+	 * Use a temporary variable to fix the incompatible pointer type
+	 * on 32-bit platforms (e.g. armv7). */
+	nr_pages = pi->nr_pages;
+	ret = page_pipe_read(pp, &pipe_read_dest, pi->vaddr, &nr_pages, PPB_LAZY);
 	if (ret)
 		return ret;
 
@@ -1135,6 +1158,7 @@ static int page_server_get_pages(int sk, struct page_server_iov *pi)
 	 * .dst_id all remain intact.
 	 */
 
+	pi->nr_pages = nr_pages;
 	if (pi->nr_pages == 0) {
 		pr_debug("no iovs found, zero pages\n");
 		return -1;
@@ -1332,7 +1356,7 @@ static int fill_page_pipe(struct page_read *pr, struct page_pipe *pp)
 static int page_pipe_from_pagemap(struct page_pipe **pp, int pid)
 {
 	struct page_read pr;
-	int nr_pages = 0;
+	unsigned long nr_pages = 0;
 
 	if (open_page_read(pid, &pr, PR_TASK) <= 0) {
 		pr_err("Failed to open page read for %d\n", pid);
@@ -1406,7 +1430,7 @@ int cr_page_server(bool daemon_mode, bool lazy_dump, int cfd)
 
 	if (opts.ps_socket != -1) {
 		ask = opts.ps_socket;
-		pr_info("Re-using ps socket %d\n", ask);
+		pr_info("Reusing ps socket %d\n", ask);
 		goto no_server;
 	}
 
@@ -1452,7 +1476,7 @@ static int connect_to_page_server(void)
 
 	if (opts.ps_socket != -1) {
 		page_server_sk = opts.ps_socket;
-		pr_info("Re-using ps socket %d\n", page_server_sk);
+		pr_info("Reusing ps socket %d\n", page_server_sk);
 		goto out;
 	}
 
@@ -1535,13 +1559,13 @@ struct ps_async_read {
 
 static LIST_HEAD(async_reads);
 
-static inline void async_read_set_goal(struct ps_async_read *ar, int nr_pages)
+static inline void async_read_set_goal(struct ps_async_read *ar, unsigned long nr_pages)
 {
 	ar->goal = sizeof(ar->pi) + nr_pages * PAGE_SIZE;
 	ar->nr_pages = nr_pages;
 }
 
-static void init_ps_async_read(struct ps_async_read *ar, void *buf, int nr_pages, ps_async_read_complete complete,
+static void init_ps_async_read(struct ps_async_read *ar, void *buf, unsigned long nr_pages, ps_async_read_complete complete,
 			       void *priv)
 {
 	ar->pages = buf;
@@ -1551,7 +1575,7 @@ static void init_ps_async_read(struct ps_async_read *ar, void *buf, int nr_pages
 	async_read_set_goal(ar, nr_pages);
 }
 
-static int page_server_start_async_read(void *buf, int nr_pages, ps_async_read_complete complete, void *priv)
+static int page_server_start_async_read(void *buf, unsigned long nr_pages, ps_async_read_complete complete, void *priv)
 {
 	struct ps_async_read *ar;
 
@@ -1651,7 +1675,7 @@ int connect_to_page_server_to_recv(int epfd)
 	return epoll_add_rfd(epfd, &ps_rfd);
 }
 
-int request_remote_pages(unsigned long img_id, unsigned long addr, int nr_pages)
+int request_remote_pages(unsigned long img_id, unsigned long addr, unsigned long nr_pages)
 {
 	struct page_server_iov pi = {
 		.cmd = PS_IOV_GET,
@@ -1668,7 +1692,7 @@ int request_remote_pages(unsigned long img_id, unsigned long addr, int nr_pages)
 	return 0;
 }
 
-static int page_server_start_sync_read(void *buf, int nr, ps_async_read_complete complete, void *priv)
+static int page_server_start_sync_read(void *buf, unsigned long nr, ps_async_read_complete complete, void *priv)
 {
 	struct ps_async_read ar;
 	int ret = 1;
@@ -1679,7 +1703,7 @@ static int page_server_start_sync_read(void *buf, int nr, ps_async_read_complete
 	return ret;
 }
 
-int page_server_start_read(void *buf, int nr, ps_async_read_complete complete, void *priv, unsigned flags)
+int page_server_start_read(void *buf, unsigned long nr, ps_async_read_complete complete, void *priv, unsigned flags)
 {
 	if (flags & PR_ASYNC)
 		return page_server_start_async_read(buf, nr, complete, priv);
diff --git a/criu/pagemap-cache.c b/criu/pagemap-cache.c
index 00f088ff3..457c0d649 100644
--- a/criu/pagemap-cache.c
+++ b/criu/pagemap-cache.c
@@ -1,5 +1,6 @@
 #include <unistd.h>
 #include <fcntl.h>
+#include <sys/ioctl.h>
 
 #include "page.h"
 #include "pagemap-cache.h"
@@ -10,6 +11,7 @@
 #include "vma.h"
 #include "mem.h"
 #include "kerndat.h"
+#include "fault-injection.h"
 
 #undef LOG_PREFIX
 #define LOG_PREFIX "pagemap-cache: "
@@ -22,6 +24,8 @@
 
 #define PAGEMAP_LEN(addr) (PAGE_PFN(addr) * sizeof(u64))
 
+#define PAGE_REGIONS_MAX_NR 32768
+
 /*
  * It's a workaround for a kernel bug. In the 3.19 kernel when pagemap are read
  * for a few vma-s for one read call, it returns incorrect data.
@@ -50,10 +54,23 @@ int pmc_init(pmc_t *pmc, pid_t pid, const struct list_head *vma_head, size_t siz
 	pmc->pid = pid;
 	pmc->map_len = PAGEMAP_LEN(map_size);
 	pmc->vma_head = vma_head;
+	pmc->regs_max_len = PAGE_PFN(map_size);
+	if (pmc->regs_max_len > PAGE_REGIONS_MAX_NR)
+		pmc->regs_max_len = PAGE_REGIONS_MAX_NR;
+	pmc->regs_len = 0;
+	pmc->regs_idx = 0;
+	pmc->regs = NULL;
+	pmc->map = NULL;
 
-	pmc->map = xmalloc(pmc->map_len);
-	if (!pmc->map)
-		goto err;
+	if (kdat.has_pagemap_scan && !fault_injected(FI_DONT_USE_PAGEMAP_SCAN)) {
+		pmc->regs = xmalloc(pmc->regs_max_len * sizeof(struct page_region));
+		if (!pmc->regs)
+			goto err;
+	} else {
+		pmc->map = xmalloc(pmc->map_len);
+		if (!pmc->map)
+			goto err;
+	}
 
 	if (pagemap_cache_disabled)
 		pr_warn_once("The pagemap cache is disabled\n");
@@ -87,17 +104,11 @@ err:
 	return -1;
 }
 
-static inline u64 *__pmc_get_map(pmc_t *pmc, unsigned long addr)
-{
-	return &pmc->map[PAGE_PFN(addr - pmc->start)];
-}
-
 static int pmc_fill_cache(pmc_t *pmc, const struct vma_area *vma)
 {
 	unsigned long low = vma->e->start & PMC_MASK;
 	unsigned long high = low + PMC_SIZE;
 	size_t len = vma_area_len(vma);
-	size_t size_map;
 
 	if (high > kdat.task_size)
 		high = kdat.task_size;
@@ -115,7 +126,7 @@ static int pmc_fill_cache(pmc_t *pmc, const struct vma_area *vma)
 	 * fit in solid manner, iow -- either the whole vma fits
 	 * the cache window, either plain read is used.
 	 *
-	 * The benefit (apart redusing the number of read() calls)
+	 * The benefit (apart reducing the number of read() calls)
 	 * is to walk page tables less.
 	 */
 	if (!pagemap_cache_disabled && len < PMC_SIZE && (vma->e->start - low) < PMC_SIZE_GAP) {
@@ -149,39 +160,89 @@ static int pmc_fill_cache(pmc_t *pmc, const struct vma_area *vma)
 			pr_debug("\t%d: simple mode [l:%lx h:%lx]\n", pmc->pid, pmc->start, pmc->end);
 	}
 
+	return pmc_fill(pmc, pmc->start, pmc->end);
+}
+
+int pmc_fill(pmc_t *pmc, u64 start, u64 end)
+{
+	size_t size_map, off;
+
+	pmc->start = start;
+	pmc->end = end;
+
 	size_map = PAGEMAP_LEN(pmc->end - pmc->start);
 	BUG_ON(pmc->map_len < size_map);
 	BUG_ON(pmc->fd < 0);
 
-	if (pread(pmc->fd, pmc->map, size_map, PAGEMAP_PFN_OFF(pmc->start)) != size_map) {
-		pmc_zap(pmc);
-		pr_perror("Can't read %d's pagemap file", pmc->pid);
-		return -1;
+	if (pmc->regs) {
+		struct pm_scan_arg args = {
+			.size = sizeof(struct pm_scan_arg),
+			.flags = 0,
+			.start = pmc->start,
+			.end = pmc->end,
+			.vec = (long)pmc->regs,
+			.vec_len = pmc->regs_max_len,
+			.max_pages = 0,
+			/*
+			 * Request pages that are in  RAM or swap, excluding
+			 * zero-filled and file-backed pages.
+			 */
+			.category_inverted = PAGE_IS_PFNZERO | PAGE_IS_FILE,
+			.category_mask = PAGE_IS_PFNZERO | PAGE_IS_FILE,
+			.category_anyof_mask = PAGE_IS_PRESENT | PAGE_IS_SWAPPED,
+			.return_mask = PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_SOFT_DIRTY,
+		};
+		long ret;
+
+		if (kdat.has_pagemap_scan_guard_pages)
+			args.return_mask |= PAGE_IS_GUARD;
+
+		ret = ioctl(pmc->fd, PAGEMAP_SCAN, &args);
+		if (ret == -1) {
+			pr_perror("PAGEMAP_SCAN");
+			pmc_zap(pmc);
+			return -1;
+		}
+		pmc->regs_len = ret;
+		pmc->regs_idx = 0;
+		pmc->end = args.walk_end;
+	} else {
+		for (off = 0; off != size_map;) {
+			ssize_t ret;
+			char *ptr = (char *)pmc->map;
+
+			ret = pread(pmc->fd, ptr + off, size_map - off, PAGEMAP_PFN_OFF(pmc->start) + off);
+			if (ret == -1) {
+				pmc_zap(pmc);
+				pr_perror("Can't read %d's pagemap file", pmc->pid);
+				return -1;
+			}
+			off += ret;
+		}
 	}
 
 	return 0;
 }
 
-u64 *pmc_get_map(pmc_t *pmc, const struct vma_area *vma)
+int pmc_get_map(pmc_t *pmc, const struct vma_area *vma)
 {
 	/* Hit */
 	if (likely(pmc->start <= vma->e->start && pmc->end >= vma->e->end))
-		return __pmc_get_map(pmc, vma->e->start);
+		return 0;
 
 	/* Miss, refill the cache */
 	if (pmc_fill_cache(pmc, vma)) {
 		pr_err("Failed to fill cache for %d (%lx-%lx)\n", pmc->pid, (long)vma->e->start, (long)vma->e->end);
-		return NULL;
+		return -1;
 	}
-
-	/* Hit for sure */
-	return __pmc_get_map(pmc, vma->e->start);
+	return 0;
 }
 
 void pmc_fini(pmc_t *pmc)
 {
 	close_safe(&pmc->fd);
 	xfree(pmc->map);
+	xfree(pmc->regs);
 	pmc_reset(pmc);
 }
 
diff --git a/criu/pagemap.c b/criu/pagemap.c
index 83f69bba3..6c9c4f7fe 100644
--- a/criu/pagemap.c
+++ b/criu/pagemap.c
@@ -168,15 +168,15 @@ static int seek_pagemap(struct page_read *pr, unsigned long vaddr)
 	return 0;
 }
 
-static inline void pagemap_bound_check(PagemapEntry *pe, unsigned long vaddr, int nr)
+static inline void pagemap_bound_check(PagemapEntry *pe, unsigned long vaddr, unsigned long int nr)
 {
 	if (vaddr < pe->vaddr || (vaddr - pe->vaddr) / PAGE_SIZE + nr > pe->nr_pages) {
-		pr_err("Page read err %" PRIx64 ":%u vs %lx:%u\n", pe->vaddr, pe->nr_pages, vaddr, nr);
+		pr_err("Page read err %" PRIx64 ":%" PRIx64 " vs %lx:%lx\n", pe->vaddr, pe->nr_pages, vaddr, nr);
 		BUG();
 	}
 }
 
-static int read_parent_page(struct page_read *pr, unsigned long vaddr, int nr, void *buf, unsigned flags)
+static int read_parent_page(struct page_read *pr, unsigned long vaddr, unsigned long int nr, void *buf, unsigned flags)
 {
 	struct page_read *ppr = pr->parent;
 	int ret;
@@ -195,7 +195,7 @@ static int read_parent_page(struct page_read *pr, unsigned long vaddr, int nr, v
 	 */
 
 	do {
-		int p_nr;
+		unsigned long int p_nr;
 
 		pr_debug("\tpr%lu-%u Read from parent\n", pr->img_id, pr->id);
 		ret = ppr->seek_pagemap(ppr, vaddr);
@@ -210,7 +210,7 @@ static int read_parent_page(struct page_read *pr, unsigned long vaddr, int nr, v
 		 * read as much as we can.
 		 */
 		p_nr = ppr->pe->nr_pages - (vaddr - ppr->pe->vaddr) / PAGE_SIZE;
-		pr_info("\tparent has %u pages in\n", p_nr);
+		pr_info("\tparent has %lu pages in\n", p_nr);
 		if (p_nr > nr)
 			p_nr = nr;
 
@@ -261,7 +261,7 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr, unsigned l
 			break;
 	}
 
-	if (opts.auto_dedup) {
+	if (opts.auto_dedup && !pr->disable_dedup) {
 		ret = punch_hole(pr, pr->pi_off, len, false);
 		if (ret == -1)
 			return -1;
@@ -374,7 +374,7 @@ int pagemap_enqueue_iovec(struct page_read *pr, void *buf, unsigned long len, st
 	return 0;
 }
 
-static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr, int nr, void *buf, unsigned flags)
+static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr, unsigned long nr, void *buf, unsigned flags)
 {
 	int ret;
 	unsigned long len = nr * PAGE_SIZE;
@@ -402,7 +402,7 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr, int
  * We cannot use maybe_read_page_local() for streaming images as it uses
  * pread(), seeking in the file. Instead, we use this custom page reader.
  */
-static int maybe_read_page_img_streamer(struct page_read *pr, unsigned long vaddr, int nr, void *buf, unsigned flags)
+static int maybe_read_page_img_streamer(struct page_read *pr, unsigned long vaddr, unsigned long nr, void *buf, unsigned flags)
 {
 	unsigned long len = nr * PAGE_SIZE;
 	int fd;
@@ -445,7 +445,7 @@ static int maybe_read_page_img_streamer(struct page_read *pr, unsigned long vadd
 	return ret;
 }
 
-static int read_page_complete(unsigned long img_id, unsigned long vaddr, int nr_pages, void *priv)
+static int read_page_complete(unsigned long img_id, unsigned long vaddr, unsigned long int nr_pages, void *priv)
 {
 	int ret = 0;
 	struct page_read *pr = priv;
@@ -463,7 +463,7 @@ static int read_page_complete(unsigned long img_id, unsigned long vaddr, int nr_
 	return ret;
 }
 
-static int maybe_read_page_remote(struct page_read *pr, unsigned long vaddr, int nr, void *buf, unsigned flags)
+static int maybe_read_page_remote(struct page_read *pr, unsigned long vaddr, unsigned long nr, void *buf, unsigned flags)
 {
 	int ret;
 
@@ -474,9 +474,9 @@ static int maybe_read_page_remote(struct page_read *pr, unsigned long vaddr, int
 	return ret;
 }
 
-static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr, void *buf, unsigned flags)
+static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, unsigned long nr, void *buf, unsigned flags)
 {
-	pr_info("pr%lu-%u Read %lx %u pages\n", pr->img_id, pr->id, vaddr, nr);
+	pr_info("pr%lu-%u Read %lx %lu pages\n", pr->img_id, pr->id, vaddr, nr);
 	pagemap_bound_check(pr->pe, vaddr, nr);
 
 	if (pagemap_in_parent(pr->pe)) {
@@ -682,6 +682,9 @@ static void init_compat_pagemap_entry(PagemapEntry *pe)
 		pe->flags |= PE_PARENT;
 	else if (!pe->has_flags)
 		pe->flags = PE_PRESENT;
+
+	if (!pe->has_nr_pages)
+		pe->nr_pages = pe->compat_nr_pages;
 }
 
 /*
@@ -792,6 +795,7 @@ int open_page_read_at(int dfd, unsigned long img_id, struct page_read *pr, int p
 	pr->bunch.iov_base = NULL;
 	pr->pmes = NULL;
 	pr->pieok = false;
+	pr->disable_dedup = false;
 
 	pr->pmi = open_image_at(dfd, i_typ, O_RSTR, img_id);
 	if (!pr->pmi)
@@ -852,6 +856,14 @@ int open_page_read(unsigned long img_id, struct page_read *pr, int pr_flags)
 
 #define DUP_IDS_BASE 1000
 
+void page_read_disable_dedup(struct page_read *pr)
+{
+	pr_debug("disable dedup, id: %d\n", pr->id);
+	pr->disable_dedup = true;
+	if (pr->parent)
+		page_read_disable_dedup(pr->parent);
+}
+
 void dup_page_read(struct page_read *src, struct page_read *dst)
 {
 	static int dup_ids = 1;
diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c
index ee4fa86f4..e19847b37 100644
--- a/criu/parasite-syscall.c
+++ b/criu/parasite-syscall.c
@@ -9,7 +9,6 @@
 #include "common/compiler.h"
 #include "types.h"
 #include "protobuf.h"
-#include "images/sa.pb-c.h"
 #include "images/timer.pb-c.h"
 #include "images/creds.pb-c.h"
 #include "images/core.pb-c.h"
@@ -104,17 +103,24 @@ static int alloc_groups_copy_creds(CredsEntry *ce, struct parasite_dump_creds *c
 	BUILD_BUG_ON(sizeof(ce->cap_prm[0]) != sizeof(c->cap_prm[0]));
 	BUILD_BUG_ON(sizeof(ce->cap_eff[0]) != sizeof(c->cap_eff[0]));
 	BUILD_BUG_ON(sizeof(ce->cap_bnd[0]) != sizeof(c->cap_bnd[0]));
+	BUILD_BUG_ON(sizeof(ce->cap_amb[0]) != sizeof(c->cap_amb[0]));
 
 	BUG_ON(ce->n_cap_inh != CR_CAP_SIZE);
 	BUG_ON(ce->n_cap_prm != CR_CAP_SIZE);
 	BUG_ON(ce->n_cap_eff != CR_CAP_SIZE);
 	BUG_ON(ce->n_cap_bnd != CR_CAP_SIZE);
+	BUG_ON(ce->n_cap_amb != CR_CAP_SIZE);
 
 	memcpy(ce->cap_inh, c->cap_inh, sizeof(c->cap_inh[0]) * CR_CAP_SIZE);
 	memcpy(ce->cap_prm, c->cap_prm, sizeof(c->cap_prm[0]) * CR_CAP_SIZE);
 	memcpy(ce->cap_eff, c->cap_eff, sizeof(c->cap_eff[0]) * CR_CAP_SIZE);
 	memcpy(ce->cap_bnd, c->cap_bnd, sizeof(c->cap_bnd[0]) * CR_CAP_SIZE);
+	memcpy(ce->cap_amb, c->cap_amb, sizeof(c->cap_amb[0]) * CR_CAP_SIZE);
 
+	if (c->no_new_privs > 0) {
+		ce->no_new_privs = c->no_new_privs;
+		ce->has_no_new_privs = true;
+	}
 	ce->secbits = c->secbits;
 	ce->n_groups = c->ngroups;
 
@@ -195,13 +201,13 @@ int parasite_dump_thread_seized(struct parasite_thread_ctl *tctl, struct parasit
 	ret = compel_get_thread_regs(tctl, save_task_regs, core);
 	if (ret) {
 		pr_err("Can't obtain regs for thread %d\n", pid);
-		goto err_rth;
+		return -1;
 	}
 
 	ret = compel_arch_fetch_thread_area(tctl);
 	if (ret) {
 		pr_err("Can't obtain thread area of %d\n", pid);
-		goto err_rth;
+		return -1;
 	}
 
 	compel_arch_get_tls_thread(tctl, &args->tls);
@@ -211,223 +217,17 @@ int parasite_dump_thread_seized(struct parasite_thread_ctl *tctl, struct parasit
 	ret = compel_run_in_thread(tctl, PARASITE_CMD_DUMP_THREAD);
 	if (ret) {
 		pr_err("Can't init thread in parasite %d\n", pid);
-		goto err_rth;
+		return -1;
 	}
 
 	ret = alloc_groups_copy_creds(creds, pc);
 	if (ret) {
 		pr_err("Can't copy creds for thread %d\n", pid);
-		goto err_rth;
+		return -1;
 	}
 
-	compel_release_thread(tctl);
-
 	tid->ns[0].virt = args->tid;
 	return dump_thread_core(pid, core, args);
-
-err_rth:
-	compel_release_thread(tctl);
-	return -1;
-}
-
-int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct pstree_item *item)
-{
-	TaskCoreEntry *tc = item->core[0]->tc;
-	struct parasite_dump_sa_args *args;
-	int ret, sig;
-	SaEntry *sa, **psa;
-
-	args = compel_parasite_args(ctl, struct parasite_dump_sa_args);
-
-	ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_SIGACTS, ctl);
-	if (ret < 0)
-		return ret;
-
-	psa = xmalloc((SIGMAX - 2) * (sizeof(SaEntry *) + sizeof(SaEntry)));
-	if (!psa)
-		return -1;
-
-	sa = (SaEntry *)(psa + SIGMAX - 2);
-
-	tc->n_sigactions = SIGMAX - 2;
-	tc->sigactions = psa;
-
-	for (sig = 1; sig <= SIGMAX; sig++) {
-		int i = sig - 1;
-
-		if (sig == SIGSTOP || sig == SIGKILL)
-			continue;
-
-		sa_entry__init(sa);
-		ASSIGN_TYPED(sa->sigaction, encode_pointer(args->sas[i].rt_sa_handler));
-		ASSIGN_TYPED(sa->flags, args->sas[i].rt_sa_flags);
-		ASSIGN_TYPED(sa->restorer, encode_pointer(args->sas[i].rt_sa_restorer));
-#ifdef CONFIG_MIPS
-		sa->has_mask_extended = 1;
-		BUILD_BUG_ON(sizeof(sa->mask) * 2 != sizeof(args->sas[0].rt_sa_mask.sig));
-		memcpy(&sa->mask, &(args->sas[i].rt_sa_mask.sig[0]), sizeof(sa->mask));
-		memcpy(&sa->mask_extended, &(args->sas[i].rt_sa_mask.sig[1]), sizeof(sa->mask));
-#else
-		BUILD_BUG_ON(sizeof(sa->mask) != sizeof(args->sas[0].rt_sa_mask.sig));
-		memcpy(&sa->mask, args->sas[i].rt_sa_mask.sig, sizeof(sa->mask));
-#endif
-		sa->has_compat_sigaction = true;
-		sa->compat_sigaction = !compel_mode_native(ctl);
-
-		*(psa++) = sa++;
-	}
-
-	return 0;
-}
-
-static void encode_itimer(struct itimerval *v, ItimerEntry *ie)
-{
-	ie->isec = v->it_interval.tv_sec;
-	ie->iusec = v->it_interval.tv_usec;
-	ie->vsec = v->it_value.tv_sec;
-	ie->vusec = v->it_value.tv_usec;
-}
-
-int parasite_dump_itimers_seized(struct parasite_ctl *ctl, struct pstree_item *item)
-{
-	CoreEntry *core = item->core[0];
-	struct parasite_dump_itimers_args *args;
-	int ret;
-
-	args = compel_parasite_args(ctl, struct parasite_dump_itimers_args);
-
-	ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_ITIMERS, ctl);
-	if (ret < 0)
-		return ret;
-
-	encode_itimer((&args->real), (core->tc->timers->real));
-	encode_itimer((&args->virt), (core->tc->timers->virt));
-	encode_itimer((&args->prof), (core->tc->timers->prof));
-
-	return 0;
-}
-
-static int core_alloc_posix_timers(TaskTimersEntry *tte, int n, PosixTimerEntry **pte)
-{
-	int sz;
-
-	/*
-	 * Will be free()-ed in core_entry_free()
-	 */
-
-	sz = n * (sizeof(PosixTimerEntry *) + sizeof(PosixTimerEntry));
-	tte->posix = xmalloc(sz);
-	if (!tte->posix)
-		return -1;
-
-	tte->n_posix = n;
-	*pte = (PosixTimerEntry *)(tte->posix + n);
-	return 0;
-}
-
-static int encode_notify_thread_id(pid_t rtid, struct pstree_item *item, PosixTimerEntry *pte)
-{
-	pid_t vtid = 0;
-	int i;
-
-	if (rtid == 0)
-		return 0;
-
-	if (!(root_ns_mask & CLONE_NEWPID)) {
-		/* Non-pid-namespace case */
-		pte->notify_thread_id = rtid;
-		pte->has_notify_thread_id = true;
-		return 0;
-	}
-
-	/* Pid-namespace case */
-	if (!kdat.has_nspid) {
-		pr_err("Have no NSpid support to dump notify thread id in pid namespace\n");
-		return -1;
-	}
-
-	for (i = 0; i < item->nr_threads; i++) {
-		if (item->threads[i].real != rtid)
-			continue;
-
-		vtid = item->threads[i].ns[0].virt;
-		break;
-	}
-
-	if (vtid == 0) {
-		pr_err("Unable to convert the notify thread id %d\n", rtid);
-		return -1;
-	}
-
-	pte->notify_thread_id = vtid;
-	pte->has_notify_thread_id = true;
-	return 0;
-}
-
-static int encode_posix_timer(struct pstree_item *item, struct posix_timer *v, struct proc_posix_timer *vp,
-			      PosixTimerEntry *pte)
-{
-	pte->it_id = vp->spt.it_id;
-	pte->clock_id = vp->spt.clock_id;
-	pte->si_signo = vp->spt.si_signo;
-	pte->it_sigev_notify = vp->spt.it_sigev_notify;
-	pte->sival_ptr = encode_pointer(vp->spt.sival_ptr);
-
-	pte->overrun = v->overrun;
-
-	pte->isec = v->val.it_interval.tv_sec;
-	pte->insec = v->val.it_interval.tv_nsec;
-	pte->vsec = v->val.it_value.tv_sec;
-	pte->vnsec = v->val.it_value.tv_nsec;
-
-	if (encode_notify_thread_id(vp->spt.notify_thread_id, item, pte))
-		return -1;
-
-	return 0;
-}
-
-int parasite_dump_posix_timers_seized(struct proc_posix_timers_stat *proc_args, struct parasite_ctl *ctl,
-				      struct pstree_item *item)
-{
-	CoreEntry *core = item->core[0];
-	TaskTimersEntry *tte = core->tc->timers;
-	PosixTimerEntry *pte;
-	struct proc_posix_timer *temp;
-	struct parasite_dump_posix_timers_args *args;
-	int ret, exit_code = -1;
-	int args_size;
-	int i;
-
-	if (core_alloc_posix_timers(tte, proc_args->timer_n, &pte))
-		return -1;
-
-	args_size = posix_timers_dump_size(proc_args->timer_n);
-	args = compel_parasite_args_s(ctl, args_size);
-	args->timer_n = proc_args->timer_n;
-
-	i = 0;
-	list_for_each_entry(temp, &proc_args->timers, list) {
-		args->timer[i].it_id = temp->spt.it_id;
-		i++;
-	}
-
-	ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_POSIX_TIMERS, ctl);
-	if (ret < 0)
-		goto end_posix;
-
-	i = 0;
-	list_for_each_entry(temp, &proc_args->timers, list) {
-		posix_timer_entry__init(&pte[i]);
-		if (encode_posix_timer(item, &args->timer[i], temp, &pte[i]))
-			goto end_posix;
-		tte->posix[i] = &pte[i];
-		i++;
-	}
-
-	exit_code = 0;
-end_posix:
-	free_posix_timers(proc_args);
-	return exit_code;
 }
 
 int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_misc *misc)
@@ -435,6 +235,7 @@ int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_mis
 	struct parasite_dump_misc *ma;
 
 	ma = compel_parasite_args(ctl, struct parasite_dump_misc);
+	ma->has_membarrier_get_registrations = kdat.has_membarrier_get_registrations;
 	if (compel_rpc_call_sync(PARASITE_CMD_DUMP_MISC, ctl) < 0)
 		return -1;
 
@@ -513,6 +314,7 @@ int parasite_dump_cgroup(struct parasite_ctl *ctl, struct parasite_dump_cgroup_a
 	struct parasite_dump_cgroup_args *ca;
 
 	ca = compel_parasite_args(ctl, struct parasite_dump_cgroup_args);
+	memcpy(ca->thread_cgrp, cgroup->thread_cgrp, sizeof(ca->thread_cgrp));
 	ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_CGROUP, ctl);
 	if (ret) {
 		pr_err("Parasite failed to dump /proc/self/cgroup\n");
@@ -619,7 +421,7 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct pstree_item *item,
 		ictx->flags |= INFECT_NO_MEMFD;
 	if (fault_injected(FI_PARASITE_CONNECT))
 		ictx->flags |= INFECT_FAIL_CONNECT;
-	if (fault_injected(FI_NO_BREAKPOINTS))
+	if (fault_injected(FI_NO_BREAKPOINTS) || !kdat.has_breakpoints)
 		ictx->flags |= INFECT_NO_BREAKPOINTS;
 	if (kdat.compat_cr)
 		ictx->flags |= INFECT_COMPATIBLE;
diff --git a/criu/pidfd-store.c b/criu/pidfd-store.c
index b15568e08..110f7802a 100644
--- a/criu/pidfd-store.c
+++ b/criu/pidfd-store.c
@@ -13,6 +13,7 @@
 #include "log.h"
 #include "util.h"
 #include "pidfd-store.h"
+#include "sockets.h"
 
 struct pidfd_entry {
 	pid_t pid;
@@ -94,13 +95,11 @@ int init_pidfd_store_sk(pid_t pid, int sk)
 	 * This is similar to how fdstore_init() works.
 	 */
 	if (addrlen == sizeof(sa_family_t)) {
-		if (setsockopt(pidfd_store_sk, SOL_SOCKET, SO_SNDBUFFORCE, &buf[0], sizeof(buf[0])) < 0 ||
-		    setsockopt(pidfd_store_sk, SOL_SOCKET, SO_RCVBUFFORCE, &buf[1], sizeof(buf[1])) < 0) {
-			pr_perror("Unable to set SO_SNDBUFFORCE/SO_RCVBUFFORCE");
+		if (sk_setbufs(pidfd_store_sk, buf)) {
 			goto err;
 		}
 
-		addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-pidfd-store-%d-%d-%" PRIx64, pid, sk,
+		addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-pidfd-store-%d-%d-%s", pid, sk,
 				   criu_run_id);
 		addrlen += sizeof(addr.sun_family);
 
diff --git a/criu/pidfd.c b/criu/pidfd.c
new file mode 100644
index 000000000..ae32025b0
--- /dev/null
+++ b/criu/pidfd.c
@@ -0,0 +1,305 @@
+#include "common/lock.h"
+#include "imgset.h"
+#include "pidfd.h"
+#include "fdinfo.h"
+#include "pidfd.pb-c.h"
+#include "protobuf.h"
+#include "pstree.h"
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include "common/bug.h"
+#include "rst-malloc.h"
+
+#include "compel/plugins/std/syscall-codes.h"
+
+#undef LOG_PREFIX
+#define LOG_PREFIX "pidfd: "
+
+#ifndef PIDFD_THREAD
+#define PIDFD_THREAD O_EXCL
+#endif
+
+struct pidfd_info {
+	PidfdEntry *pidfe;
+	struct file_desc d;
+
+	struct dead_pidfd *dead;
+	struct pidfd_info *next;
+};
+
+struct dead_pidfd {
+	unsigned int ino;
+	int creator_id;
+
+	struct hlist_node hash;
+	struct pidfd_info *list;
+};
+
+#define DEAD_PIDFD_HASH_SIZE 32
+static struct hlist_head dead_pidfd_hash[DEAD_PIDFD_HASH_SIZE];
+
+void init_dead_pidfd_hash(void)
+{
+	for (int i = 0; i < DEAD_PIDFD_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&dead_pidfd_hash[i]);
+}
+
+static struct dead_pidfd *lookup_dead_pidfd(unsigned int ino)
+{
+	struct dead_pidfd *dead;
+	struct hlist_head *chain;
+
+	chain = &dead_pidfd_hash[ino % DEAD_PIDFD_HASH_SIZE];
+	hlist_for_each_entry(dead, chain, hash) {
+		if (dead->ino == ino) {
+			return dead;
+		}
+	}
+
+	return NULL;
+}
+
+int is_pidfd_link(char *link)
+{
+	/*
+	* pidfs was introduced in Linux 6.9
+	* before which anonymous-inodes were used
+	*/
+	return is_anon_link_type(link, "[pidfd]");
+}
+
+static void pr_info_pidfd(char *action, PidfdEntry *pidfe)
+{
+	pr_info("%s: id %#08x flags %u NSpid %d ino %u\n",
+		action, pidfe->id, pidfe->flags, pidfe->nspid, pidfe->ino
+	);
+}
+
+static int dump_one_pidfd(int pidfd, u32 id, const struct fd_parms *p)
+{
+	struct pidfd_dump_info pidfd_info = {.pidfe = PIDFD_ENTRY__INIT};
+	FileEntry fe = FILE_ENTRY__INIT;
+
+	if (parse_fdinfo(pidfd, FD_TYPES__PIDFD, &pidfd_info))
+		return -1;
+
+	if (p->flags & PIDFD_THREAD) {
+		pr_err("PIDFD_THREAD flag is currently not supported\n");
+		return -1;
+	}
+
+	/*
+	* Check if the pid pidfd refers to is part of process tree
+	* This ensures the process will exist on restore.
+	*/
+	if (pidfd_info.pid != -1 && !pstree_item_by_real(pidfd_info.pid)) {
+		pr_err("pidfd pid %d is not a part of process tree..\n",
+			pidfd_info.pid);
+		return -1;
+	}
+
+	pidfd_info.pidfe.id = id;
+	pidfd_info.pidfe.flags = (p->flags & ~O_RDWR);
+	pidfd_info.pidfe.fown = (FownEntry *)&p->fown;
+
+	fe.type = FD_TYPES__PIDFD;
+	fe.id = pidfd_info.pidfe.id;
+	fe.pidfd = &pidfd_info.pidfe;
+
+	pr_info_pidfd("Dumping", &pidfd_info.pidfe);
+	return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE);
+}
+
+const struct fdtype_ops pidfd_dump_ops = {
+	.type = FD_TYPES__PIDFD,
+	.dump = dump_one_pidfd,
+};
+
+static int pidfd_open(pid_t pid, int flags)
+{
+	return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static int create_tmp_process(void)
+{
+	int tmp_process;
+	tmp_process = fork();
+	if (tmp_process < 0) {
+		pr_perror("Could not fork");
+		return -1;
+	} else if (tmp_process == 0) {
+		while(1)
+			sleep(1);
+	}
+	return tmp_process;
+}
+
+static int kill_helper(pid_t pid)
+{
+	int status;
+	sigset_t blockmask, oldmask;
+
+	/*
+	 * Block SIGCHLD to prevent interfering from sigchld_handler()
+	 * and to properly handle the tmp process termination without
+	 * a race condition. A similar approach is used in cr_system().
+	 */
+	sigemptyset(&oldmask);
+	sigemptyset(&blockmask);
+	sigaddset(&blockmask, SIGCHLD);
+	if (sigprocmask(SIG_BLOCK, &blockmask, &oldmask) == -1) {
+		pr_perror("Cannot set mask of blocked signals");
+		goto err;
+	}
+
+	if (kill(pid, SIGKILL) < 0) {
+		pr_perror("Could not kill temporary process with pid: %d", pid);
+		goto err;
+	}
+
+	if (waitpid(pid, &status, 0) != pid) {
+		pr_perror("Could not wait on temporary process with pid: %d", pid);
+		goto err;
+	}
+
+	/* Restore the original signal mask after tmp process has terminated */
+	if (sigprocmask(SIG_SETMASK, &oldmask, NULL) == -1) {
+		pr_perror("Cannot clear blocked signals");
+		goto err;
+	}
+
+	if (!WIFSIGNALED(status)) {
+		pr_err("Expected temporary process to be terminated by a signal\n");
+		goto err;
+	}
+
+	if (WTERMSIG(status) != SIGKILL) {
+		pr_err("Expected temporary process to be terminated by SIGKILL\n");
+		goto err;
+	}
+
+	return 0;
+err:
+	return -1;
+}
+
+static int open_one_pidfd(struct file_desc *d, int *new_fd)
+{
+	struct pidfd_info *info, *child;
+	struct dead_pidfd *dead = NULL;
+	pid_t pid;
+	int pidfd;
+
+	info = container_of(d, struct pidfd_info, d);
+	if (info->pidfe->nspid != -1) {
+		pidfd = pidfd_open(info->pidfe->nspid, info->pidfe->flags);
+		if (pidfd < 0) {
+			pr_perror("Could not open pidfd for %d", info->pidfe->nspid);
+			goto err_close;
+		}
+		goto out;
+	}
+
+	dead = lookup_dead_pidfd(info->pidfe->ino);
+	BUG_ON(!dead);
+
+	if (info->dead && info->dead->creator_id != info->pidfe->id) {
+		int ret = recv_desc_from_peer(&info->d, &pidfd);
+		if (ret != 0) {
+			if (ret != 1)
+				pr_err("Can't get fd\n");
+			return ret;
+		}
+		goto out;
+	}
+
+	pid = create_tmp_process();
+	if (pid < 0)
+		goto err_close;
+
+	for (child = dead->list; child; child = child->next) {
+		if (child == info)
+			continue;
+		pidfd = pidfd_open(pid, child->pidfe->flags);
+		if (pidfd < 0) {
+			pr_perror("Could not open pidfd for %d", child->pidfe->nspid);
+			goto err_close;
+		}
+
+		if (send_desc_to_peer(pidfd, &child->d)) {
+			pr_perror("Can't send file descriptor");
+			close(pidfd);
+			return -1;
+		}
+		close(pidfd);
+	}
+
+	pidfd = pidfd_open(pid, info->pidfe->flags);
+	if (pidfd < 0) {
+		pr_perror("Could not open pidfd for %d", info->pidfe->nspid);
+		goto err_close;
+	}
+	if (kill_helper(pid))
+		goto err_close;
+out:
+	if (rst_file_params(pidfd, info->pidfe->fown, info->pidfe->flags)) {
+		goto err_close;
+	}
+
+	*new_fd = pidfd;
+	return 0;
+err_close:
+	pr_err("Can't create pidfd %#08x NSpid: %d flags: %u\n",
+	   info->pidfe->id, info->pidfe->nspid, info->pidfe->flags);
+	return -1;
+}
+
+static struct file_desc_ops pidfd_desc_ops = {
+	.type = FD_TYPES__PIDFD,
+	.open = open_one_pidfd
+};
+
+static int collect_one_pidfd(void *obj, ProtobufCMessage *msg, struct cr_img *i)
+{
+	struct dead_pidfd *dead;
+	struct pidfd_info *info = obj;
+
+	info->pidfe = pb_msg(msg, PidfdEntry);
+	pr_info_pidfd("Collected ", info->pidfe);
+
+	info->dead = NULL;
+	if (info->pidfe->nspid != -1)
+		goto out;
+
+	dead = lookup_dead_pidfd(info->pidfe->ino);
+	if (!dead) {
+		dead = xmalloc(sizeof(*dead));
+		if (!dead) {
+			pr_err("Could not allocate memory..\n");
+			return -1;
+		}
+
+		INIT_HLIST_NODE(&dead->hash);
+		dead->list = NULL;
+		dead->ino = info->pidfe->ino;
+		dead->creator_id = info->pidfe->id;
+		hlist_add_head(&dead->hash, &dead_pidfd_hash[dead->ino % DEAD_PIDFD_HASH_SIZE]);
+	}
+
+	info->dead = dead;
+	info->next = dead->list;
+	dead->list = info;
+	if (dead->creator_id > info->pidfe->id)
+		dead->creator_id = info->pidfe->id;
+
+out:
+	return file_desc_add(&info->d, info->pidfe->id, &pidfd_desc_ops);
+}
+
+struct collect_image_info pidfd_cinfo = {
+	.fd_type = CR_FD_PIDFD,
+	.pb_type = PB_PIDFD,
+	.priv_size = sizeof(struct pidfd_info),
+	.collect = collect_one_pidfd,
+};
diff --git a/criu/pie/Makefile b/criu/pie/Makefile
index 265dcf82b..60c7f1e94 100644
--- a/criu/pie/Makefile
+++ b/criu/pie/Makefile
@@ -18,6 +18,15 @@ ifeq ($(ARCH),mips)
 	ccflags-y	+= -mno-abicalls -fno-pic
 endif
 
+# -mshstk required for CET instructions
+ifeq ($(ARCH),x86)
+	ccflags-y	+= -mshstk
+endif
+
+ifeq ($(ARCH),riscv64)
+	ccflags-y	+= -fno-stack-protector
+endif
+
 LDS		:= compel/arch/$(ARCH)/scripts/compel-pack.lds.S
 
 restorer-obj-y	+= parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o
@@ -38,6 +47,10 @@ ifeq ($(ARCH),ppc64)
         restorer-obj-y	+= ./$(ARCH_DIR)/vdso-trampoline.o
 endif
 
+ifeq ($(ARCH),riscv64)
+        restorer-obj-y	+= ./$(ARCH_DIR)/vdso-lookup.o
+endif
+
 define gen-pie-rules
 $(1)-obj-y	+= $(1).o
 $(1)-obj-e	+= pie.lib.a
diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library
index da2a2fab3..d96a7ac32 100644
--- a/criu/pie/Makefile.library
+++ b/criu/pie/Makefile.library
@@ -27,3 +27,7 @@ CFLAGS		+= $(CFLAGS_PIE)
 ifeq ($(ARCH),mips)
 CFLAGS			+= -fno-stack-protector -DCR_NOGLIBC -mno-abicalls -fno-pic
 endif
+
+ifeq ($(ARCH),riscv64)
+	ccflags-y	+= -fno-stack-protector
+endif
\ No newline at end of file
diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c
index 355007fa9..f3ad3107f 100644
--- a/criu/pie/parasite-vdso.c
+++ b/criu/pie/parasite-vdso.c
@@ -45,6 +45,7 @@ static int remap_one(char *who, unsigned long *from, unsigned long to, size_t si
 static int park_at(struct vdso_maps *rt, unsigned long vdso, unsigned long vvar)
 {
 	unsigned long vvar_size = rt->sym.vvar_size;
+	unsigned long vvar_vclock_size = rt->sym.vvar_vclock_size;
 	unsigned long vdso_size = rt->sym.vdso_size;
 	int ret;
 
@@ -54,8 +55,24 @@ static int park_at(struct vdso_maps *rt, unsigned long vdso, unsigned long vvar)
 
 	std_log_set_gettimeofday(NULL); /* stop using vdso for timings */
 
-	if (vvar)
+	if (vvar) {
+		/*
+		 * v6.13-rc1~172^2~9 splits the vvar vma in two parts vvar and
+		 * vvar_clock. The last one is mapped right after the first
+		 * one.
+		 */
+		if (vvar_vclock_size) {
+			unsigned long from;
+
+			vvar_size -= vvar_vclock_size;
+			from = rt->vvar_start + vvar_size;
+
+			ret = remap_one("rt-vvar", &from, vvar + vvar_size, vvar_vclock_size);
+			if (ret)
+				return ret;
+		}
 		ret = remap_one("rt-vvar", &rt->vvar_start, vvar, vvar_size);
+	}
 
 	if (!ret)
 		vdso_update_gtod_addr(rt);
diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c
index e7eb1fcb6..c966e9e62 100644
--- a/criu/pie/parasite.c
+++ b/criu/pie/parasite.c
@@ -3,7 +3,6 @@
 #include <signal.h>
 #include <linux/limits.h>
 #include <linux/capability.h>
-#include <sys/mount.h>
 #include <stdarg.h>
 #include <sys/ioctl.h>
 #include <sys/uio.h>
@@ -14,6 +13,7 @@
 #include "int.h"
 #include "types.h"
 #include <compel/plugins/std/syscall.h>
+#include "linux/mount.h"
 #include "parasite.h"
 #include "fcntl.h"
 #include "prctl.h"
@@ -101,7 +101,7 @@ static int dump_pages(struct parasite_dump_pages_args *args)
 	}
 	if (spliced_bytes != args->nr_pages * PAGE_SIZE) {
 		sys_close(p);
-		pr_err("Can't splice all pages to pipe (%ld/%d)\n", spliced_bytes, args->nr_pages);
+		pr_err("Can't splice all pages to pipe (%ld/%ld)\n", spliced_bytes, args->nr_pages);
 		return -1;
 	}
 
@@ -211,6 +211,63 @@ out:
 	return ret;
 }
 
+/*
+ * Returns a membarrier() registration command (it is a bitmask) if the process
+ * was registered for specified (as a bit index) membarrier()-issuing command;
+ * returns zero otherwise.
+ */
+static int get_membarrier_registration_mask(int cmd_bit)
+{
+	unsigned cmd = 1 << cmd_bit;
+	int ret;
+
+	/*
+	 * Issuing a barrier will be successful only if the process was registered
+	 * for this type of membarrier. All errors are a sign that the type issued
+	 * was not registered (EPERM) or not supported by kernel (EINVAL or ENOSYS).
+	 */
+	ret = sys_membarrier(cmd, 0, 0);
+	if (ret && ret != -EPERM && ret != -EINVAL && ret != -ENOSYS) {
+		pr_err("membarrier(1 << %d) returned %d\n", cmd_bit, ret);
+		return -1;
+	}
+	pr_debug("membarrier(1 << %d) returned %d\n", cmd_bit, ret);
+	/*
+	 * For supported registrations, MEMBARRIER_CMD_REGISTER_xxx = MEMBARRIER_CMD_xxx << 1.
+	 * See: enum membarrier_cmd in include/uapi/linux/membarrier.h in kernel sources.
+	 */
+	return ret ? 0 : cmd << 1;
+}
+
+/*
+ * It would be better to check the following with BUILD_BUG_ON, but we might
+ * have an old linux/membarrier.h header without necessary enum values.
+ */
+#define MEMBARRIER_CMDBIT_PRIVATE_EXPEDITED	      3
+#define MEMBARRIER_CMDBIT_PRIVATE_EXPEDITED_SYNC_CORE 5
+#define MEMBARRIER_CMDBIT_PRIVATE_EXPEDITED_RSEQ      7
+#define MEMBARRIER_CMDBIT_GET_REGISTRATIONS	      9
+
+static int dump_membarrier_compat(int *membarrier_registration_mask)
+{
+	int ret;
+
+	*membarrier_registration_mask = 0;
+	ret = get_membarrier_registration_mask(MEMBARRIER_CMDBIT_PRIVATE_EXPEDITED);
+	if (ret < 0)
+		return -1;
+	*membarrier_registration_mask |= ret;
+	ret = get_membarrier_registration_mask(MEMBARRIER_CMDBIT_PRIVATE_EXPEDITED_SYNC_CORE);
+	if (ret < 0)
+		return -1;
+	*membarrier_registration_mask |= ret;
+	ret = get_membarrier_registration_mask(MEMBARRIER_CMDBIT_PRIVATE_EXPEDITED_RSEQ);
+	if (ret < 0)
+		return -1;
+	*membarrier_registration_mask |= ret;
+	return 0;
+}
+
 static int dump_misc(struct parasite_dump_misc *args)
 {
 	int ret;
@@ -225,6 +282,19 @@ static int dump_misc(struct parasite_dump_misc *args)
 	args->dumpable = sys_prctl(PR_GET_DUMPABLE, 0, 0, 0, 0);
 	args->thp_disabled = sys_prctl(PR_GET_THP_DISABLE, 0, 0, 0, 0);
 
+	if (args->has_membarrier_get_registrations) {
+		ret = sys_membarrier(1 << MEMBARRIER_CMDBIT_GET_REGISTRATIONS, 0, 0);
+		if (ret < 0) {
+			pr_err("membarrier(1 << %d) returned %d\n", MEMBARRIER_CMDBIT_GET_REGISTRATIONS, ret);
+			return -1;
+		}
+		args->membarrier_registration_mask = ret;
+	} else {
+		ret = dump_membarrier_compat(&args->membarrier_registration_mask);
+		if (ret)
+			return ret;
+	}
+
 	ret = sys_prctl(PR_GET_CHILD_SUBREAPER, (unsigned long)&args->child_subreaper, 0, 0, 0);
 	if (ret)
 		pr_err("PR_GET_CHILD_SUBREAPER failed (%d)\n", ret);
@@ -254,6 +324,7 @@ static int dump_creds(struct parasite_dump_creds *args)
 		args->cap_prm[i] = data[i].prm;
 		args->cap_inh[i] = data[i].inh;
 		args->cap_bnd[i] = 0;
+		args->cap_amb[i] = 0;
 
 		for (j = 0; j < 32; j++) {
 			if (j + i * 32 > args->cap_last_cap)
@@ -266,8 +337,21 @@ static int dump_creds(struct parasite_dump_creds *args)
 			if (ret)
 				args->cap_bnd[i] |= (1 << j);
 		}
+
+		for (j = 0; j < 32; j++) {
+			if (j + i * 32 > args->cap_last_cap)
+				break;
+			ret = sys_prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, j + i * 32, 0, 0);
+			if (ret < 0) {
+				pr_err("Unable to read ambient capability %d: %d\n", j + i * 32, ret);
+				return -1;
+			}
+			if (ret)
+				args->cap_amb[i] |= (1 << j);
+		}
 	}
 
+	args->no_new_privs = sys_prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
 	args->secbits = sys_prctl(PR_GET_SECUREBITS, 0, 0, 0, 0);
 
 	ret = sys_getgroups(0, NULL);
@@ -745,7 +829,7 @@ static int parasite_dump_cgroup(struct parasite_dump_cgroup_args *args)
 		return -1;
 	}
 
-	cgroup = sys_openat(proc, "self/cgroup", O_RDONLY, 0);
+	cgroup = sys_openat(proc, args->thread_cgrp, O_RDONLY, 0);
 	sys_close(proc);
 	if (cgroup < 0) {
 		pr_err("can't get /proc/self/cgroup fd\n");
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index f80b68359..0a8aba41b 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -17,6 +17,7 @@
 #include <sys/resource.h>
 #include <signal.h>
 #include <sys/inotify.h>
+#include <sys/socket.h>
 
 #include "linux/userfaultfd.h"
 
@@ -27,6 +28,7 @@
 #include <compel/plugins/std/syscall.h>
 #include <compel/plugins/std/log.h>
 #include <compel/ksigset.h>
+#include "mman.h"
 #include "signal.h"
 #include "prctl.h"
 #include "criu-log.h"
@@ -48,7 +50,17 @@
 #include "images/inventory.pb-c.h"
 
 #include "shmem.h"
-#include "restorer.h"
+
+/*
+ * sys_getgroups() buffer size. Not too much, to avoid stack overflow.
+ */
+#define MAX_GETGROUPS_CHECKED (512 / sizeof(unsigned int))
+
+/*
+ * Memory overhead limit for reading VMA when auto_dedup is enabled.
+ * An arbitrarily chosen trade-off point between speed and memory usage.
+ */
+#define AUTO_DEDUP_OVERHEAD_BYTES (128 << 20)
 
 #ifndef PR_SET_PDEATHSIG
 #define PR_SET_PDEATHSIG 1
@@ -66,6 +78,10 @@
 #define FALLOC_FL_PUNCH_HOLE 0x02
 #endif
 
+#ifndef ARCH_RT_SIGRETURN_RST
+#define ARCH_RT_SIGRETURN_RST ARCH_RT_SIGRETURN
+#endif
+
 #define sys_prctl_safe(opcode, val1, val2, val3)                                \
 	({                                                                      \
 		long __ret = sys_prctl(opcode, val1, val2, val3, 0);            \
@@ -92,7 +108,7 @@ bool fault_injected(enum faults f)
  * Hint: compel on aarch64 shall learn relocs for that.
  */
 static unsigned __page_size;
-unsigned page_size(void)
+unsigned long page_size(void)
 {
 	return __page_size;
 }
@@ -184,37 +200,58 @@ static int lsm_set_label(char *label, char *type, int procfd)
 	return 0;
 }
 
-static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_type)
+static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_type, uid_t uid)
 {
 	CredsEntry *ce = &args->creds;
 	int b, i, ret;
 	struct cap_header hdr;
 	struct cap_data data[_LINUX_CAPABILITY_U32S_3];
-
-	/*
-	 * We're still root here and thus can do it without failures.
-	 */
+	int ruid, euid, suid, fsuid;
+	int rgid, egid, sgid, fsgid;
 
 	/*
 	 * Setup supplementary group IDs early.
 	 */
 	if (args->groups) {
-		ret = sys_setgroups(ce->n_groups, args->groups);
-		if (ret) {
-			pr_err("Can't setup supplementary group IDs: %d\n", ret);
-			return -1;
+		/*
+		 * We may be in an unprivileged user namespace where setgroups
+		 * is disabled.  If the current list of groups is already what
+		 * we want, skip the call to setgroups.
+		 */
+		unsigned int gids[MAX_GETGROUPS_CHECKED];
+		int n = sys_getgroups(MAX_GETGROUPS_CHECKED, gids);
+		if (n != ce->n_groups || memcmp(gids, args->groups, n * sizeof(*gids))) {
+			ret = sys_setgroups(ce->n_groups, args->groups);
+			if (ret) {
+				pr_err("Can't setgroups([%zu gids]): %d\n", ce->n_groups, ret);
+				return -1;
+			}
 		}
 	}
 
+	/*
+	 * Compare xids with current values. If all match then we can skip
+	 * setting them (which requires extra capabilities).
+	 */
+	fsuid = sys_setfsuid(-1);
+	fsgid = sys_setfsgid(-1);
+	if (sys_getresuid(&ruid, &euid, &suid) == 0 && sys_getresgid(&rgid, &egid, &sgid) == 0 && ruid == ce->uid &&
+	    euid == ce->euid && suid == ce->suid && rgid == ce->gid && egid == ce->egid && sgid == ce->sgid &&
+	    fsuid == ce->fsuid && fsgid == ce->fsgid) {
+		goto skip_xids;
+	}
+
 	/*
 	 * First -- set the SECURE_NO_SETUID_FIXUP bit not to
 	 * lose caps bits when changing xids.
 	 */
 
-	ret = sys_prctl(PR_SET_SECUREBITS, 1 << SECURE_NO_SETUID_FIXUP, 0, 0, 0);
-	if (ret) {
-		pr_err("Unable to set SECURE_NO_SETUID_FIXUP: %d\n", ret);
-		return -1;
+	if (!uid) {
+		ret = sys_prctl(PR_SET_SECUREBITS, 1 << SECURE_NO_SETUID_FIXUP, 0, 0, 0);
+		if (ret) {
+			pr_err("Unable to set SECURE_NO_SETUID_FIXUP: %d\n", ret);
+			return -1;
+		}
 	}
 
 	/*
@@ -247,15 +284,18 @@ static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_typ
 		return -1;
 	}
 
+skip_xids:
 	/*
 	 * Third -- restore securebits. We don't need them in any
 	 * special state any longer.
 	 */
 
-	ret = sys_prctl(PR_SET_SECUREBITS, ce->secbits, 0, 0, 0);
-	if (ret) {
-		pr_err("Unable to set PR_SET_SECUREBITS: %d\n", ret);
-		return -1;
+	if (sys_prctl(PR_GET_SECUREBITS, 0, 0, 0, 0) != ce->secbits) {
+		ret = sys_prctl(PR_SET_SECUREBITS, ce->secbits, 0, 0, 0);
+		if (ret) {
+			pr_err("Unable to set PR_SET_SECUREBITS: %d\n", ret);
+			return -1;
+		}
 	}
 
 	/*
@@ -271,10 +311,18 @@ static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_typ
 				/* already set */
 				continue;
 			ret = sys_prctl(PR_CAPBSET_DROP, i + b * 32, 0, 0, 0);
-			if (ret) {
+			if (!ret)
+				continue;
+			if (!ce->has_no_new_privs || !ce->no_new_privs || args->cap_prm[b] & (1 << i)) {
 				pr_err("Unable to drop capability %d: %d\n", i + b * 32, ret);
 				return -1;
 			}
+			/*
+			 * If prctl(NO_NEW_PRIVS) is going to be set then it
+			 * will prevent inheriting the capabilities not in
+			 * the permitted set.
+			 */
+			pr_warn("Unable to drop capability %d from bset: %d (but NO_NEW_PRIVS will drop it)\n", i + b * 32, ret);
 		}
 	}
 
@@ -300,6 +348,22 @@ static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_typ
 		return -1;
 	}
 
+	for (b = 0; b < CR_CAP_SIZE; b++) {
+		for (i = 0; i < 32; i++) {
+			if (b * 32 + i > args->cap_last_cap)
+				break;
+			if ((args->cap_amb[b] & (1 << i)) == 0)
+				/* don't set */
+				continue;
+			ret = sys_prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i + b * 32, 0, 0);
+			if (!ret)
+				continue;
+			pr_err("Unable to raise ambient capability %d: %d\n", i + b * 32, ret);
+			return -1;
+		}
+	}
+
+
 	if (lsm_type != LSMTYPE__SELINUX) {
 		/*
 		 * SELinux does not support setting the process context for
@@ -315,6 +379,14 @@ static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_typ
 	if (lsm_set_label(args->lsm_sockcreate, "sockcreate", procfd) < 0)
 		return -1;
 
+	if (ce->has_no_new_privs && ce->no_new_privs) {
+		ret = sys_prctl(PR_SET_NO_NEW_PRIVS, ce->no_new_privs, 0, 0, 0);
+		if (ret) {
+			pr_err("Unable to set no_new_privs=%d: %d\n", ce->no_new_privs, ret);
+			return -1;
+		}
+	}
+
 	return 0;
 }
 
@@ -579,14 +651,99 @@ static int restore_thread_common(struct thread_restore_args *args)
 
 static void noinline rst_sigreturn(unsigned long new_sp, struct rt_sigframe *sigframe)
 {
-	ARCH_RT_SIGRETURN(new_sp, sigframe);
+	ARCH_RT_SIGRETURN_RST(new_sp, sigframe);
+}
+
+static int send_cg_set(int sk, int cg_set)
+{
+	struct cmsghdr *ch;
+	struct msghdr h;
+	/*
+	 * 0th is the dummy call address for compatibility with userns helper
+	 * 1st is the cg_set
+	 */
+	struct iovec iov[2];
+	char cmsg[CMSG_SPACE(sizeof(struct ucred))] = {};
+	int ret, *dummy = NULL;
+	struct ucred *ucred;
+
+	iov[0].iov_base = &dummy;
+	iov[0].iov_len = sizeof(dummy);
+	iov[1].iov_base = &cg_set;
+	iov[1].iov_len = sizeof(cg_set);
+
+	h.msg_iov = iov;
+	h.msg_iovlen = sizeof(iov) / sizeof(struct iovec);
+	h.msg_name = NULL;
+	h.msg_namelen = 0;
+	h.msg_flags = 0;
+
+	h.msg_control = cmsg;
+	h.msg_controllen = sizeof(cmsg);
+	ch = CMSG_FIRSTHDR(&h);
+	ch->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+	ch->cmsg_level = SOL_SOCKET;
+	ch->cmsg_type = SCM_CREDENTIALS;
+
+	ucred = (struct ucred *)CMSG_DATA(ch);
+	/*
+	 * We still have privilege in this namespace so we can send
+	 * thread id instead of pid of main thread, uid, gid as 0
+	 * since these 2 are ignored in cgroupd
+	 */
+	ucred->pid = sys_gettid();
+	ucred->uid = 0;
+	ucred->gid = 0;
+
+	ret = sys_sendmsg(sk, &h, 0);
+	if (ret < 0) {
+		pr_err("Unable to send packet to cgroupd %d\n", ret);
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * As the cgroupd socket is shared among threads and processes, this
+ * should be called with task_entries->cgroupd_sync_lock held.
+ */
+static int recv_cg_set_restore_ack(int sk)
+{
+	struct cmsghdr *ch;
+	struct msghdr h = {};
+	char cmsg[CMSG_SPACE(sizeof(struct ucred))];
+	struct ucred *cred;
+	int ret;
+
+	h.msg_control = cmsg;
+	h.msg_controllen = sizeof(cmsg);
+
+	ret = sys_recvmsg(sk, &h, 0);
+	if (ret < 0) {
+		pr_err("Unable to receive from cgroupd %d\n", ret);
+		return -1;
+	}
+
+	if (h.msg_controllen != sizeof(cmsg)) {
+		pr_err("The message from cgroupd is truncated\n");
+		return -1;
+	}
+
+	ch = CMSG_FIRSTHDR(&h);
+	cred = (struct ucred *)CMSG_DATA(ch);
+	if (cred->pid != sys_gettid()) {
+		pr_err("cred pid %d != gettid\n", cred->pid);
+		return -1;
+	}
+	return 0;
 }
 
 /*
  * Threads restoration via sigreturn. Note it's locked
  * routine and calls for unlock at the end.
  */
-long __export_restore_thread(struct thread_restore_args *args)
+__visible long __export_restore_thread(struct thread_restore_args *args)
 {
 	struct rt_sigframe *rt_sigframe;
 	k_rtsigset_t to_block;
@@ -599,6 +756,10 @@ long __export_restore_thread(struct thread_restore_args *args)
 		goto core_restore_end;
 	}
 
+	/* restore original shadow stack */
+	if (arch_shstk_restore(&args->shstk))
+		goto core_restore_end;
+
 	/* All signals must be handled by thread leader */
 	ksigfillset(&to_block);
 	ret = sys_sigprocmask(SIG_SETMASK, &to_block, NULL, sizeof(k_rtsigset_t));
@@ -609,6 +770,24 @@ long __export_restore_thread(struct thread_restore_args *args)
 
 	rt_sigframe = (void *)&args->mz->rt_sigframe;
 
+	if (args->cg_set != -1) {
+		int err = 0;
+
+		mutex_lock(&task_entries_local->cgroupd_sync_lock);
+
+		pr_info("Restore cg_set in thread cg_set: %d\n", args->cg_set);
+
+		err = send_cg_set(args->cgroupd_sk, args->cg_set);
+		if (!err)
+			err = recv_cg_set_restore_ack(args->cgroupd_sk);
+
+		mutex_unlock(&task_entries_local->cgroupd_sync_lock);
+		sys_close(args->cgroupd_sk);
+
+		if (err)
+			goto core_restore_end;
+	}
+
 	if (restore_thread_common(args))
 		goto core_restore_end;
 
@@ -634,7 +813,7 @@ long __export_restore_thread(struct thread_restore_args *args)
 	if (restore_seccomp(args))
 		BUG();
 
-	ret = restore_creds(args->creds_args, args->ta->proc_fd, args->ta->lsm_type);
+	ret = restore_creds(args->creds_args, args->ta->proc_fd, args->ta->lsm_type, args->ta->uid);
 	ret = ret || restore_dumpable_flag(&args->ta->mm);
 	ret = ret || restore_pdeath_sig(args);
 	if (ret)
@@ -933,6 +1112,23 @@ static int vma_remap(VmaEntry *vma_entry, int uffd)
 
 	pr_info("Remap %lx->%lx len %lx\n", src, dst, len);
 
+	/*
+	 * SHSTK VMAs are a bit special, in fact we create shstk vma right in the
+	 * shstk_vma_restore() and populate it with contents from a premapped VMA
+	 * (which in turns is just a normal anonymous VMA!). Then, we munmap() this
+	 * premapped VMA. After, we need to adjust vma_premmaped_start(vma_entry)
+	 * to point to a created shstk vma and treat it as a premmaped one in vma_remap().
+	 */
+	if (vma_entry_is(vma_entry, VMA_AREA_SHSTK)) {
+		if (shstk_vma_restore(vma_entry)) {
+			pr_err("Unable to prepare shadow stack vma for remap %lx -> %lx\n", src, dst);
+			return -1;
+		}
+
+		/* shstk_vma_restore() modifies vma premapped address */
+		src = vma_premmaped_start(vma_entry);
+	}
+
 	if (src - dst < len)
 		guard = dst;
 	else if (dst - src < len)
@@ -957,7 +1153,7 @@ static int vma_remap(VmaEntry *vma_entry, int uffd)
 		 * |G|----tgt----|       |
 		 *
 		 * 3. remap src to any other place.
-		 *    G prevents src from being remaped on tgt again
+		 *    G prevents src from being remapped on tgt again
 		 * |       |-------------| -> |+++++src+++++|
 		 * |G|---tgt-----|                          |
 		 *
@@ -1057,9 +1253,23 @@ static int timerfd_arm(struct task_restore_args *args)
 
 static int create_posix_timers(struct task_restore_args *args)
 {
-	int ret, i;
-	kernel_timer_t next_id;
+	int ret, i, exit_code = -1;
+	kernel_timer_t next_id = 0, timer_id;
 	struct sigevent sev;
+	bool create_restore_ids = false;
+
+	if (!args->posix_timers_n)
+		return 0;
+
+	/* prctl returns EINVAL if PR_TIMER_CREATE_RESTORE_IDS isn't supported. */
+	ret = sys_prctl(PR_TIMER_CREATE_RESTORE_IDS,
+			PR_TIMER_CREATE_RESTORE_IDS_ON, 0, 0, 0);
+	if (ret == 0) {
+		create_restore_ids = true;
+	} else if (ret != -EINVAL) {
+		pr_err("Can't enabled PR_TIMER_CREATE_RESTORE_IDS: %d\n", ret);
+		return -1;
+	}
 
 	for (i = 0; i < args->posix_timers_n; i++) {
 		sev.sigev_notify = args->posix_timers[i].spt.it_sigev_notify;
@@ -1071,30 +1281,61 @@ static int create_posix_timers(struct task_restore_args *args)
 #endif
 		sev.sigev_value.sival_ptr = args->posix_timers[i].spt.sival_ptr;
 
+		if (create_restore_ids) {
+			/*
+			 * With enabled PR_TIMER_CREATE_RESTORE_IDS, the
+			 * timer_create syscall creates a new timer with the
+			 * specified ID.
+			 */
+			timer_id = args->posix_timers[i].spt.it_id;
+			ret = sys_timer_create(args->posix_timers[i].spt.clock_id, &sev, &timer_id);
+			if (ret < 0) {
+				pr_err("Can't create posix timer - %d: %d\n", i, ret);
+				goto out;
+			}
+			if (timer_id != args->posix_timers[i].spt.it_id) {
+				pr_err("Unexpected timer id %u (expected %lu)\n",
+				       timer_id, args->posix_timers[i].spt.it_id);
+				goto out;
+			}
+			continue;
+		}
+
 		while (1) {
-			ret = sys_timer_create(args->posix_timers[i].spt.clock_id, &sev, &next_id);
+			ret = sys_timer_create(args->posix_timers[i].spt.clock_id, &sev, &timer_id);
 			if (ret < 0) {
 				pr_err("Can't create posix timer - %d\n", i);
-				return ret;
+				goto out;
 			}
 
-			if (next_id == args->posix_timers[i].spt.it_id)
+			if (timer_id != next_id) {
+				pr_err("Can't create timers, kernel don't give them consequently\n");
+				goto out;
+			}
+			next_id++;
+
+			if (timer_id == args->posix_timers[i].spt.it_id)
 				break;
 
-			ret = sys_timer_delete(next_id);
+			ret = sys_timer_delete(timer_id);
 			if (ret < 0) {
-				pr_err("Can't remove temporaty posix timer 0x%x\n", next_id);
-				return ret;
-			}
-
-			if ((long)next_id > args->posix_timers[i].spt.it_id) {
-				pr_err("Can't create timers, kernel don't give them consequently\n");
-				return -1;
+				pr_err("Can't remove temporaty posix timer 0x%x\n", timer_id);
+				goto out;
 			}
 		}
 	}
 
-	return 0;
+	exit_code = 0;
+out:
+	if (create_restore_ids) {
+		ret = sys_prctl(PR_TIMER_CREATE_RESTORE_IDS,
+				PR_TIMER_CREATE_RESTORE_IDS_OFF, 0, 0, 0);
+		if (ret != 0) {
+			pr_err("Can't disable PR_TIMER_CREATE_RESTORE_IDS: %d\n", ret);
+			exit_code = -1;
+		}
+	}
+	return exit_code;
 }
 
 static void restore_posix_timers(struct task_restore_args *args)
@@ -1117,18 +1358,24 @@ unsigned long vdso_rt_size = 0;
 void *bootstrap_start = NULL;
 unsigned int bootstrap_len = 0;
 
-void __export_unmap(void)
+__visible void __export_unmap(void)
 {
 	sys_munmap(bootstrap_start, bootstrap_len - vdso_rt_size);
 }
 
-static void unregister_libc_rseq(struct rst_rseq_param *rseq)
+static int unregister_libc_rseq(struct rst_rseq_param *rseq)
 {
-	if (!rseq->rseq_abi_pointer)
-		return;
+	long ret;
 
-	/* can't fail if rseq is registered */
-	sys_rseq(decode_pointer(rseq->rseq_abi_pointer), rseq->rseq_abi_size, 1, rseq->signature);
+	if (!rseq->rseq_abi_pointer)
+		return 0;
+
+	ret = sys_rseq(decode_pointer(rseq->rseq_abi_pointer), rseq->rseq_abi_size, 1, rseq->signature);
+	if (ret) {
+		pr_err("Failed to unregister libc rseq %ld\n", ret);
+		return -1;
+	}
+	return 0;
 }
 
 /*
@@ -1324,6 +1571,40 @@ static int fd_poll(int inotify_fd)
 	return sys_ppoll(&pfd, 1, &tmo, NULL, sizeof(sigset_t));
 }
 
+/*
+ * Call preadv() but limit size of the read. Zero `max_to_read` skips the limit.
+ */
+static ssize_t preadv_limited(int fd, struct iovec *iovs, int nr, off_t offs, size_t max_to_read)
+{
+	size_t saved_last_iov_len = 0;
+	ssize_t ret;
+
+	if (max_to_read) {
+		for (int i = 0; i < nr; ++i) {
+			if (iovs[i].iov_len <= max_to_read) {
+				max_to_read -= iovs[i].iov_len;
+				continue;
+			}
+
+			if (!max_to_read) {
+				nr = i;
+				break;
+			}
+
+			saved_last_iov_len = iovs[i].iov_len;
+			iovs[i].iov_len = max_to_read;
+			nr = i + 1;
+			break;
+		}
+	}
+
+	ret = sys_preadv(fd, iovs, nr, offs);
+	if (saved_last_iov_len)
+		iovs[nr - 1].iov_len = saved_last_iov_len;
+
+	return ret;
+}
+
 /*
  * In the worst case buf size should be:
  *   sizeof(struct inotify_event) * 2 + PATH_MAX
@@ -1384,6 +1665,54 @@ int cleanup_current_inotify_events(struct task_restore_args *task_args)
 	return 0;
 }
 
+/*
+ * Restore membarrier() registrations.
+ */
+static int restore_membarrier_registrations(int mask)
+{
+	unsigned long bitmap[1] = { mask };
+	int i, err, ret = 0;
+
+	if (!mask)
+		return 0;
+
+	pr_info("Restoring membarrier() registrations %x\n", mask);
+
+	for_each_bit(i, bitmap) {
+		err = sys_membarrier(1 << i, 0, 0);
+		if (!err)
+			continue;
+		pr_err("Can't restore membarrier(1 << %d) registration: %d\n", i, err);
+		ret = -1;
+	}
+
+	return ret;
+}
+
+static int restore_madv_guard_regions(struct task_restore_args *args)
+{
+	int i, ret;
+
+	for (i = 0; i < args->vmas_n; i++) {
+		VmaEntry *vma_entry = args->vmas + i;
+		size_t len;
+
+		if (!vma_entry_is(vma_entry, VMA_AREA_GUARD))
+			continue;
+
+		len = vma_entry->end - vma_entry->start;
+		ret = sys_madvise(vma_entry->start, len, MADV_GUARD_INSTALL);
+		if (ret) {
+			pr_err("madvise(%" PRIx64 ", %zu, MADV_GUARD_INSTALL) "
+			       "failed with %d\n",
+			       vma_entry->start, len, ret);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * The main routine to restore task via sigreturn.
  * This one is very special, we never return there
@@ -1391,7 +1720,7 @@ int cleanup_current_inotify_events(struct task_restore_args *task_args)
  * and jump execution to some predefined ip read from
  * core file.
  */
-long __export_restore_task(struct task_restore_args *args)
+__visible long __export_restore_task(struct task_restore_args *args)
 {
 	long ret = -1;
 	int i;
@@ -1451,6 +1780,9 @@ long __export_restore_task(struct task_restore_args *args)
 		pr_debug("lazy-pages: uffd %d\n", args->uffd);
 	}
 
+	if (arch_shstk_switch_to_restorer(&args->shstk))
+		goto core_restore_end;
+
 	/*
 	 * Park vdso/vvar in a safe place if architecture doesn't support
 	 * mapping them with arch_prctl().
@@ -1477,7 +1809,8 @@ long __export_restore_task(struct task_restore_args *args)
 	 * for instance once the kernel will want to update (struct rseq).cpu_id field:
 	 * https://github.com/torvalds/linux/blob/ce522ba9ef7e/kernel/rseq.c#L89
 	 */
-	unregister_libc_rseq(&args->libc_rseq);
+	if (unregister_libc_rseq(&args->libc_rseq))
+		goto core_restore_end;
 
 	if (unmap_old_vmas((void *)args->premmapped_addr, args->premmapped_len, bootstrap_start, bootstrap_len,
 			   args->task_size))
@@ -1523,17 +1856,13 @@ long __export_restore_task(struct task_restore_args *args)
 			goto core_restore_end;
 	}
 
-	if (args->uffd > -1) {
-		/* re-enable THP if we disabled it previously */
-		if (args->has_thp_enabled) {
-			int ret;
-			ret = sys_prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0);
-			if (ret) {
-				pr_err("Cannot re-enable THP: %d\n", ret);
-				goto core_restore_end;
-			}
-		}
+	ret = sys_prctl(PR_SET_THP_DISABLE, args->thp_disabled, 0, 0, 0);
+	if (ret) {
+		pr_err("Cannot restore THP_DISABLE=%d flag: %ld\n", args->thp_disabled, ret);
+		goto core_restore_end;
+	}
 
+	if (args->uffd > -1) {
 		pr_debug("lazy-pages: closing uffd %d\n", args->uffd);
 		/*
 		 * All userfaultfd configuration has finished at this point.
@@ -1575,7 +1904,12 @@ long __export_restore_task(struct task_restore_args *args)
 
 		while (nr) {
 			pr_debug("Preadv %lx:%d... (%d iovs)\n", (unsigned long)iovs->iov_base, (int)iovs->iov_len, nr);
-			r = sys_preadv(args->vma_ios_fd, iovs, nr, rio->off);
+			/*
+			 * If we're requested to punch holes in the file after reading we do
+			 * it to save memory. Limit the reads then to an arbitrary block size.
+			 */
+			r = preadv_limited(args->vma_ios_fd, iovs, nr, rio->off,
+					   args->auto_dedup ? AUTO_DEDUP_OVERHEAD_BYTES : 0);
 			if (r < 0) {
 				pr_err("Can't read pages data (%d)\n", (int)r);
 				goto core_restore_end;
@@ -1662,6 +1996,9 @@ long __export_restore_task(struct task_restore_args *args)
 
 		for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) {
 			if (vma_entry->madv & (1ul << m)) {
+				if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR)))
+					continue;
+
 				ret = sys_madvise(vma_entry->start, vma_entry_len(vma_entry), m);
 				if (ret) {
 					pr_err("madvise(%" PRIx64 ", %" PRIu64 ", %ld) "
@@ -1673,6 +2010,13 @@ long __export_restore_task(struct task_restore_args *args)
 		}
 	}
 
+	/*
+	 * Restore madvise(MADV_GUARD_INSTALL)
+	 */
+	ret = restore_madv_guard_regions(args);
+	if (ret)
+		goto core_restore_end;
+
 	/*
 	 * Tune up the task fields.
 	 */
@@ -1702,6 +2046,24 @@ long __export_restore_task(struct task_restore_args *args)
 		.exe_fd = args->fd_exe_link,
 	};
 	ret = sys_prctl(PR_SET_MM, PR_SET_MM_MAP, (long)&prctl_map, sizeof(prctl_map), 0);
+	if (ret) {
+		pr_debug("prctl PR_SET_MM_MAP failed with %d\n", (int)ret);
+		pr_debug("  .start_code = %" PRIx64 "\n", prctl_map.start_code);
+		pr_debug("  .end_code = %" PRIx64 "\n", prctl_map.end_code);
+		pr_debug("  .start_data = %" PRIx64 "\n", prctl_map.start_data);
+		pr_debug("  .end_data = %" PRIx64 "\n", prctl_map.end_data);
+		pr_debug("  .start_stack = %" PRIx64 "\n", prctl_map.start_stack);
+		pr_debug("  .start_brk = %" PRIx64 "\n", prctl_map.start_brk);
+		pr_debug("  .brk = %" PRIx64 "\n", prctl_map.brk);
+		pr_debug("  .arg_start = %" PRIx64 "\n", prctl_map.arg_start);
+		pr_debug("  .arg_end = %" PRIx64 "\n", prctl_map.arg_end);
+		pr_debug("  .env_start = %" PRIx64 "\n", prctl_map.env_start);
+		pr_debug("  .env_end = %" PRIx64 "\n", prctl_map.env_end);
+		pr_debug("  .auxv_size = %" PRIu32 "\n", prctl_map.auxv_size);
+		for (i = 0; i < prctl_map.auxv_size / sizeof(uint64_t); i++)
+			pr_debug("  .auxv[%d] = %" PRIx64 "\n", i, prctl_map.auxv[i]);
+		pr_debug("  .exe_fd = %" PRIu32 "\n", prctl_map.exe_fd);
+	}
 	if (ret == -EINVAL) {
 		ret = sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_CODE, (long)args->mm.mm_start_code, 0);
 		ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_CODE, (long)args->mm.mm_end_code, 0);
@@ -1831,6 +2193,7 @@ long __export_restore_task(struct task_restore_args *args)
 			}
 			if (ret != thread_args[i].pid) {
 				pr_err("Unable to create a thread: %ld\n", ret);
+				sys_close(fd);
 				mutex_unlock(&task_entries_local->last_pid_mutex);
 				goto core_restore_end;
 			}
@@ -1855,6 +2218,9 @@ long __export_restore_task(struct task_restore_args *args)
 		goto core_restore_end;
 	}
 
+	if (restore_membarrier_registrations(args->membarrier_registration_mask) < 0)
+		goto core_restore_end;
+
 	pr_info("%ld: Restored\n", sys_getpid());
 
 	restore_finish_stage(task_entries_local, CR_STATE_RESTORE);
@@ -1915,13 +2281,21 @@ long __export_restore_task(struct task_restore_args *args)
 	 * turning off TCP repair is CAP_SYS_NED_ADMIN protected,
 	 * thus restore* creds _after_ all of the above.
 	 */
-	ret = restore_creds(args->t->creds_args, args->proc_fd, args->lsm_type);
+	ret = restore_creds(args->t->creds_args, args->proc_fd, args->lsm_type, args->uid);
 	ret = ret || restore_dumpable_flag(&args->mm);
 	ret = ret || restore_pdeath_sig(args->t);
 	ret = ret || restore_child_subreaper(args->child_subreaper);
 
 	futex_set_and_wake(&thread_inprogress, args->nr_threads);
 
+	/*
+	 * Shadow stack of the leader can be locked only after all other
+	 * threads were cloned, otherwise they may start with read-only
+	 * shadow stack.
+	 */
+	if (arch_shstk_restore(&args->shstk))
+		goto core_restore_end;
+
 	restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS);
 
 	if (ret)
@@ -1938,7 +2312,7 @@ long __export_restore_task(struct task_restore_args *args)
 	 * code below doesn't fail due to bad timing values.
 	 */
 
-#define itimer_armed(args, i) (args->itimers[i].it_interval.tv_sec || args->itimers[i].it_interval.tv_usec)
+#define itimer_armed(args, i) (args->itimers[i].it_value.tv_sec || args->itimers[i].it_value.tv_usec)
 
 	if (itimer_armed(args, 0))
 		sys_setitimer(ITIMER_REAL, &args->itimers[0], NULL);
diff --git a/criu/pie/util-vdso.c b/criu/pie/util-vdso.c
index f1e3239ff..45fb6a648 100644
--- a/criu/pie/util-vdso.c
+++ b/criu/pie/util-vdso.c
@@ -5,6 +5,7 @@
 #include <fcntl.h>
 #include <errno.h>
 #include <stdint.h>
+#include <stdbool.h>
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -48,10 +49,25 @@ static bool __ptr_struct_oob(uintptr_t ptr, size_t struct_size, uintptr_t start,
 	return __ptr_oob(ptr, start, size) || __ptr_struct_end_oob(ptr, struct_size, start, size);
 }
 
+/* Local strlen implementation */
+static size_t __strlen(const char *str)
+{
+	const char *ptr;
+
+	if (!str)
+		return 0;
+
+	ptr = str;
+	while (*ptr != '\0')
+		ptr++;
+
+	return ptr - str;
+}
+
 /*
  * Elf hash, see format specification.
  */
-static unsigned long elf_hash(const unsigned char *name)
+static unsigned long elf_sysv_hash(const unsigned char *name)
 {
 	unsigned long h = 0, g;
 
@@ -65,6 +81,15 @@ static unsigned long elf_hash(const unsigned char *name)
 	return h;
 }
 
+/* * The GNU hash format. Taken from glibc.  */
+static unsigned long elf_gnu_hash(const unsigned char *name)
+{
+	unsigned long h = 5381;
+	for (unsigned char c = *name; c != '\0'; c = *++name)
+		h = h * 33 + c;
+	return h;
+}
+
 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 #define BORD ELFDATA2MSB /* 0x02 */
 #else
@@ -73,30 +98,51 @@ static unsigned long elf_hash(const unsigned char *name)
 
 static int has_elf_identity(Ehdr_t *ehdr)
 {
-	/*
-	 * See Elf specification for this magic values.
-	 */
+	/* check ELF magic */
+
+	if (ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
+	    ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
+	    ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
+	    ehdr->e_ident[EI_MAG3] != ELFMAG3) {
+		pr_err("Invalid ELF magic\n");
+		return false;
+	};
+
+	/* check ELF class */
 #if defined(CONFIG_VDSO_32)
-	static const char elf_ident[] = {
-		0x7f, 0x45, 0x4c, 0x46, 0x01, BORD, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	if (ehdr->e_ident[EI_CLASS] != ELFCLASS32) {
+		pr_err("Unsupported ELF class: %d\n", ehdr->e_ident[EI_CLASS]);
+		return false;
 	};
 #else
-	static const char elf_ident[] = {
-		0x7f, 0x45, 0x4c, 0x46, 0x02, BORD, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	if (ehdr->e_ident[EI_CLASS] != ELFCLASS64) {
+		pr_err("Unsupported ELF class: %d\n", ehdr->e_ident[EI_CLASS]);
+		return false;
 	};
 #endif
 
-	BUILD_BUG_ON(sizeof(elf_ident) != sizeof(ehdr->e_ident));
-
-	if (memcmp(ehdr->e_ident, elf_ident, sizeof(elf_ident))) {
-		pr_err("ELF header magic mismatch\n");
+	/* check ELF data encoding */
+	if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) {
+		pr_err("Unsupported ELF data encoding: %d\n", ehdr->e_ident[EI_DATA]);
 		return false;
-	}
+	};
+	/* check ELF version */
+	if (ehdr->e_ident[EI_VERSION] != EV_CURRENT) {
+		pr_err("Unsupported ELF version: %d\n", ehdr->e_ident[EI_VERSION]);
+		return false;
+	};
+	/* check ELF OSABI */
+	if (ehdr->e_ident[EI_OSABI] != ELFOSABI_NONE &&
+	    ehdr->e_ident[EI_OSABI] != ELFOSABI_LINUX) {
+		pr_err("Unsupported OSABI version: %d\n", ehdr->e_ident[EI_OSABI]);
+		return false;
+	};
 
 	return true;
 }
 
-static int parse_elf_phdr(uintptr_t mem, size_t size, Phdr_t **dynamic, Phdr_t **load)
+static int parse_elf_phdr(uintptr_t mem, size_t size,
+			  Phdr_t **dynamic, Phdr_t **load, bool *is_32bit)
 {
 	Ehdr_t *ehdr = (void *)mem;
 	uintptr_t addr;
@@ -111,6 +157,8 @@ static int parse_elf_phdr(uintptr_t mem, size_t size, Phdr_t **dynamic, Phdr_t *
 	if (!has_elf_identity(ehdr))
 		return -EINVAL;
 
+	*is_32bit = ehdr->e_ident[EI_CLASS] != ELFCLASS64;
+
 	addr = mem + ehdr->e_phoff;
 	if (__ptr_oob(addr, mem, size))
 		goto err_oob;
@@ -149,11 +197,14 @@ err_oob:
  * Output parameters are:
  *   @dyn_strtab - address of the symbol table
  *   @dyn_symtab - address of the string table section
- *   @dyn_hash   - address of the symbol hash table
+ *   @dyn_hash     - address of the symbol hash table
+ *   @use_gnu_hash - the format of hash DT_HASH or DT_GNU_HASH
  */
-static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic, Dyn_t **dyn_strtab, Dyn_t **dyn_symtab,
-			     Dyn_t **dyn_hash)
+static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic,
+			     Dyn_t **dyn_strtab, Dyn_t **dyn_symtab,
+			     Dyn_t **dyn_hash, bool *use_gnu_hash)
 {
+	Dyn_t *dyn_gnu_hash = NULL, *dyn_sysv_hash = NULL;
 	Dyn_t *dyn_syment = NULL;
 	Dyn_t *dyn_strsz = NULL;
 	uintptr_t addr;
@@ -184,16 +235,52 @@ static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic, Dyn_t
 			dyn_syment = d;
 			pr_debug("DT_SYMENT: %lx\n", (unsigned long)d->d_un.d_val);
 		} else if (d->d_tag == DT_HASH) {
-			*dyn_hash = d;
+			dyn_sysv_hash = d;
 			pr_debug("DT_HASH: %lx\n", (unsigned long)d->d_un.d_ptr);
+		} else if (d->d_tag == DT_GNU_HASH) {
+			/*
+			 * This is complicated.
+			 *
+			 * Looking at the Linux kernel source, the following can be seen
+			 * regarding which hashing style the VDSO uses on each arch:
+			 *
+			 *     aarch64: not specified (depends on linker, can be
+			 *                             only GNU hash style)
+			 *     arm: --hash-style=sysv
+			 *     loongarch: --hash-style=sysv
+			 *     mips: --hash-style=sysv
+			 *     powerpc: --hash-style=both
+			 *     riscv: --hash-style=both
+			 *     s390: --hash-style=both
+			 *     x86: --hash-style=both
+			 *
+			 * Some architectures are using both hash-styles, that
+			 * is the easiest for CRIU. Some architectures are only
+			 * using the old style (sysv), that is what CRIU supports.
+			 *
+			 * Starting with Linux 6.11, aarch64 unfortunately decided
+			 * to switch from '--hash-style=sysv' to ''. Specifying
+			 * nothing unfortunately may mean GNU hash style only and not
+			 * 'both' (depending on the linker).
+			 */
+			dyn_gnu_hash = d;
+			pr_debug("DT_GNU_HASH: %lx\n", (unsigned long)d->d_un.d_ptr);
 		}
 	}
 
-	if (!*dyn_strtab || !*dyn_symtab || !dyn_strsz || !dyn_syment || !*dyn_hash) {
+	if (!*dyn_strtab || !*dyn_symtab || !dyn_strsz || !dyn_syment ||
+	    (!dyn_gnu_hash && !dyn_sysv_hash)) {
 		pr_err("Not all dynamic entries are present\n");
 		return -EINVAL;
 	}
 
+	/*
+	 * Prefer DT_HASH over DT_GNU_HASH as it's been more tested and
+	 * as a result more stable.
+	 */
+	*use_gnu_hash = !dyn_sysv_hash;
+	*dyn_hash = dyn_sysv_hash ?: dyn_gnu_hash;
+
 	return 0;
 
 err_oob:
@@ -208,60 +295,156 @@ typedef unsigned long Hash_t;
 typedef Word_t Hash_t;
 #endif
 
-static void parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load, struct vdso_symtable *t,
-			      uintptr_t dynsymbol_names, Hash_t *hash, Dyn_t *dyn_symtab)
+typedef uint32_t Hash32_t;
+
+static bool elf_symbol_match(uintptr_t mem, size_t size,
+		uintptr_t dynsymbol_names, Sym_t *sym,
+		const char *symbol, const size_t vdso_symbol_length)
+{
+	uintptr_t addr = (uintptr_t)sym;
+	char *name;
+
+	if (__ptr_struct_oob(addr, sizeof(Sym_t), mem, size))
+		return false;
+
+	if (ELF_ST_TYPE(sym->st_info) != STT_FUNC && ELF_ST_BIND(sym->st_info) != STB_GLOBAL)
+		return false;
+
+	addr = dynsymbol_names + sym->st_name;
+	if (__ptr_struct_oob(addr, vdso_symbol_length, mem, size))
+		return false;
+	name = (void *)addr;
+
+	return !std_strncmp(name, symbol, vdso_symbol_length);
+}
+
+
+static unsigned long elf_symbol_lookup(uintptr_t mem, size_t size,
+		const char *symbol, uint32_t symbol_hash, unsigned int sym_off,
+		uintptr_t dynsymbol_names, Dyn_t *dyn_symtab, Phdr_t *load,
+		uint32_t nbucket, uint32_t nchain, void *_bucket, Hash_t *chain,
+		const size_t vdso_symbol_length, bool use_gnu_hash)
+{
+	unsigned int j;
+	uintptr_t addr;
+
+	addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr;
+
+	if (use_gnu_hash) {
+		Hash32_t *h, hash_val, *bucket = _bucket;
+
+		j = bucket[symbol_hash % nbucket];
+		if (j == STN_UNDEF)
+			return 0;
+
+		h = bucket + nbucket + (j - sym_off);
+
+		symbol_hash |= 1;
+		do {
+			Sym_t *sym = (void *)addr + sizeof(Sym_t) * j;
+
+			hash_val = *h++;
+			if ((hash_val | 1) == symbol_hash &&
+			    elf_symbol_match(mem, size, dynsymbol_names, sym,
+					     symbol, vdso_symbol_length))
+				return sym->st_value;
+			j++;
+		} while (!(hash_val & 1));
+	} else {
+		Hash_t *bucket = _bucket;
+
+		j = bucket[symbol_hash % nbucket];
+		if (j == STN_UNDEF)
+			return 0;
+
+		for (; j < nchain && j != STN_UNDEF; j = chain[j]) {
+			Sym_t *sym = (void *)addr + sizeof(Sym_t) * j;
+
+			if (elf_symbol_match(mem, size, dynsymbol_names, sym,
+					     symbol, vdso_symbol_length))
+				return sym->st_value;
+		}
+	}
+	return 0;
+}
+
+static int parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load,
+			     struct vdso_symtable *t, uintptr_t dynsymbol_names,
+			     Hash_t *hash, Dyn_t *dyn_symtab, bool use_gnu_hash,
+			     bool is_32bit)
 {
 	ARCH_VDSO_SYMBOLS_LIST
 
 	const char *vdso_symbols[VDSO_SYMBOL_MAX] = { ARCH_VDSO_SYMBOLS };
 	const size_t vdso_symbol_length = sizeof(t->symbols[0].name) - 1;
 
-	Hash_t nbucket, nchain;
-	Hash_t *bucket, *chain;
+	void *bucket = NULL;
+	Hash_t *chain = NULL;
+	uint32_t nbucket, nchain = 0;
 
-	unsigned int i, j, k;
-	uintptr_t addr;
+	unsigned int sym_off = 0;
+	unsigned int i = 0;
 
-	nbucket = hash[0];
-	nchain = hash[1];
-	bucket = &hash[2];
-	chain = &hash[nbucket + 2];
+	unsigned long (*elf_hash)(const unsigned char *);
+
+	if (use_gnu_hash) {
+		uint32_t *gnu_hash = (uint32_t *)hash;
+		uint32_t bloom_sz;
+
+		nbucket = gnu_hash[0];
+		sym_off = gnu_hash[1];
+		bloom_sz = gnu_hash[2];
+		if (is_32bit) {
+			uint32_t *bloom;
+			bloom = (uint32_t *)&gnu_hash[4];
+			bucket = (Hash_t *)(&bloom[bloom_sz]);
+		} else {
+			uint64_t *bloom;
+			bloom = (uint64_t *)&gnu_hash[4];
+			bucket = (Hash_t *)(&bloom[bloom_sz]);
+		}
+		elf_hash = &elf_gnu_hash;
+		pr_debug("nbucket %lx sym_off %lx bloom_sz %lx bucket %lx\n",
+			 (unsigned long)nbucket, (unsigned long)sym_off,
+			 (unsigned long)bloom_sz,
+			 (unsigned long)bucket);
+	} else {
+		nbucket = hash[0];
+		nchain = hash[1];
+		bucket = &hash[2];
+		chain = &hash[nbucket + 2];
+		elf_hash = &elf_sysv_hash;
+		pr_debug("nbucket %lx nchain %lx bucket %lx chain %lx\n",
+			 (unsigned long)nbucket, (unsigned long)nchain,
+			 (unsigned long)bucket, (unsigned long)chain);
+	}
 
-	pr_debug("nbucket %lx nchain %lx bucket %lx chain %lx\n", (long)nbucket, (long)nchain, (unsigned long)bucket,
-		 (unsigned long)chain);
 
 	for (i = 0; i < VDSO_SYMBOL_MAX; i++) {
 		const char *symbol = vdso_symbols[i];
-		k = elf_hash((const unsigned char *)symbol);
+		unsigned long addr, symbol_hash;
+		const size_t symbol_length = __strlen(symbol);
 
-		for (j = bucket[k % nbucket]; j < nchain && j != STN_UNDEF; j = chain[j]) {
-			Sym_t *sym;
-			char *name;
+		symbol_hash = elf_hash((const unsigned char *)symbol);
+		addr = elf_symbol_lookup(mem, size, symbol, symbol_hash,
+				sym_off, dynsymbol_names, dyn_symtab, load,
+				nbucket, nchain, bucket, chain,
+				vdso_symbol_length, use_gnu_hash);
+		pr_debug("symbol %s at address %lx\n", symbol, addr);
+		if (!addr)
+			continue;
 
-			addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr;
-
-			addr += sizeof(Sym_t) * j;
-			if (__ptr_struct_oob(addr, sizeof(Sym_t), mem, size))
-				continue;
-			sym = (void *)addr;
-
-			if (ELF_ST_TYPE(sym->st_info) != STT_FUNC && ELF_ST_BIND(sym->st_info) != STB_GLOBAL)
-				continue;
-
-			addr = dynsymbol_names + sym->st_name;
-			if (__ptr_struct_oob(addr, vdso_symbol_length, mem, size))
-				continue;
-			name = (void *)addr;
-
-			if (std_strncmp(name, symbol, vdso_symbol_length))
-				continue;
-
-			/* XXX: provide strncpy() implementation for PIE */
-			memcpy(t->symbols[i].name, name, vdso_symbol_length);
-			t->symbols[i].offset = (unsigned long)sym->st_value - load->p_vaddr;
-			break;
+		/* XXX: provide strncpy() implementation for PIE */
+		if (symbol_length > vdso_symbol_length) {
+			pr_err("strlen(%s) %zd, only %zd bytes available\n",
+				symbol, symbol_length, vdso_symbol_length);
+			return -EINVAL;
 		}
+		memcpy(t->symbols[i].name, symbol, symbol_length);
+		t->symbols[i].offset = addr - load->p_vaddr;
 	}
+
+	return 0;
 }
 
 int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t)
@@ -271,6 +454,8 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t)
 	Dyn_t *dyn_symtab = NULL;
 	Dyn_t *dyn_hash = NULL;
 	Hash_t *hash = NULL;
+	bool use_gnu_hash;
+	bool is_32bit;
 
 	uintptr_t dynsymbol_names;
 	uintptr_t addr;
@@ -281,7 +466,7 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t)
 	/*
 	 * We need PT_LOAD and PT_DYNAMIC here. Each once.
 	 */
-	ret = parse_elf_phdr(mem, size, &dynamic, &load);
+	ret = parse_elf_phdr(mem, size, &dynamic, &load, &is_32bit);
 	if (ret < 0)
 		return ret;
 	if (!load || !dynamic) {
@@ -296,7 +481,8 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t)
 	 * needed. Note that we're interested in a small set of tags.
 	 */
 
-	ret = parse_elf_dynamic(mem, size, dynamic, &dyn_strtab, &dyn_symtab, &dyn_hash);
+	ret = parse_elf_dynamic(mem, size, dynamic, &dyn_strtab, &dyn_symtab,
+				&dyn_hash, &use_gnu_hash);
 	if (ret < 0)
 		return ret;
 
@@ -310,7 +496,11 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t)
 		goto err_oob;
 	hash = (void *)addr;
 
-	parse_elf_symbols(mem, size, load, t, dynsymbol_names, hash, dyn_symtab);
+	ret = parse_elf_symbols(mem, size, load, t, dynsymbol_names, hash, dyn_symtab,
+				use_gnu_hash, is_32bit);
+
+	if (ret <0)
+		return ret;
 
 	return 0;
 
diff --git a/criu/pipes.c b/criu/pipes.c
index 43ff06e3d..daada8830 100644
--- a/criu/pipes.c
+++ b/criu/pipes.c
@@ -434,7 +434,7 @@ int dump_one_pipe_data(struct pipe_data_dump *pd, int lfd, const struct fd_parms
 	/* steal_pipe has to be able to fit all data from a target pipe */
 	if (fcntl(steal_pipe[1], F_SETPIPE_SZ, pipe_size) < 0) {
 		pr_perror("Unable to set a pipe size");
-		goto err;
+		goto err_close;
 	}
 
 	bytes = tee(lfd, steal_pipe[1], pipe_size, SPLICE_F_NONBLOCK);
diff --git a/criu/plugin.c b/criu/plugin.c
index f3fea2856..f9322a3c2 100644
--- a/criu/plugin.c
+++ b/criu/plugin.c
@@ -57,6 +57,11 @@ static cr_plugin_desc_t *cr_gen_plugin_desc(void *h, char *path)
 	__assign_hook(HANDLE_DEVICE_VMA, "cr_plugin_handle_device_vma");
 	__assign_hook(UPDATE_VMA_MAP, "cr_plugin_update_vma_map");
 	__assign_hook(RESUME_DEVICES_LATE, "cr_plugin_resume_devices_late");
+	__assign_hook(PAUSE_DEVICES, "cr_plugin_pause_devices");
+	__assign_hook(CHECKPOINT_DEVICES, "cr_plugin_checkpoint_devices");
+	__assign_hook(POST_FORKING, "cr_plugin_post_forking");
+	__assign_hook(RESTORE_INIT, "cr_plugin_restore_init");
+	__assign_hook(DUMP_DEVICES_LATE, "cr_plugin_dump_devices_late");
 
 #undef __assign_hook
 
@@ -254,6 +259,17 @@ int cr_plugin_init(int stage)
 			goto err;
 	}
 
+	if (stage == CR_PLUGIN_STAGE__RESTORE) {
+		int ret;
+
+		if (check_inventory_plugins())
+			goto err;
+
+		ret = run_plugins(RESTORE_INIT);
+		if (ret < 0 && ret != -ENOTSUP)
+			goto err;
+	}
+
 	exit_code = 0;
 err:
 	closedir(d);
diff --git a/criu/proc_parse.c b/criu/proc_parse.c
index b3badb6e4..f51f2e801 100644
--- a/criu/proc_parse.c
+++ b/criu/proc_parse.c
@@ -42,10 +42,12 @@
 #include "fault-injection.h"
 #include "memfd.h"
 #include "hugetlb.h"
+#include "pidfd.h"
 
 #include "protobuf.h"
 #include "images/fdinfo.pb-c.h"
 #include "images/mnt.pb-c.h"
+#include "pidfd.pb-c.h"
 #include "plugin.h"
 
 #include <stdlib.h>
@@ -72,6 +74,8 @@ struct buffer {
 
 static struct buffer __buf;
 static char *buf = __buf.buf;
+/* only ever goes from false to true, if at all */
+static bool uprobes_vma_exists = false;
 
 /*
  * This is how AIO ring buffers look like in proc
@@ -118,7 +122,8 @@ bool handle_vma_plugin(int *fd, struct stat *stat)
 	return true;
 }
 
-static void __parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf)
+static void __parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf,
+			    int *shstk)
 {
 	char *tok;
 
@@ -141,6 +146,8 @@ static void __parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf)
 			*flags |= MAP_NORESERVE;
 		else if (_vmflag_match(tok, "ht"))
 			*flags |= MAP_HUGETLB;
+		else if (_vmflag_match(tok, "dp"))
+			*flags |= MAP_DROPPABLE;
 
 		/* madvise() block */
 		if (_vmflag_match(tok, "sr"))
@@ -157,11 +164,16 @@ static void __parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf)
 			*madv |= (1ul << MADV_HUGEPAGE);
 		else if (_vmflag_match(tok, "nh"))
 			*madv |= (1ul << MADV_NOHUGEPAGE);
+		else if (_vmflag_match(tok, "wf"))
+			*madv |= (1ul << MADV_WIPEONFORK);
 
 		/* vmsplice doesn't work for VM_IO and VM_PFNMAP mappings. */
 		if (_vmflag_match(tok, "io") || _vmflag_match(tok, "pf"))
 			*io_pf = 1;
 
+		if (_vmflag_match(tok, "ss"))
+			*shstk = 1;
+
 		/*
 		 * Anything else is just ignored.
 		 */
@@ -172,25 +184,49 @@ static void __parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf)
 
 void parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf)
 {
-	__parse_vmflags(buf, flags, madv, io_pf);
+	int shstk = 0;
+
+	__parse_vmflags(buf, flags, madv, io_pf, &shstk);
 }
 
 static void parse_vma_vmflags(char *buf, struct vma_area *vma_area)
 {
 	int io_pf = 0;
+	int shstk = 0;
 
-	__parse_vmflags(buf, &vma_area->e->flags, &vma_area->e->madv, &io_pf);
+	__parse_vmflags(buf, &vma_area->e->flags, &vma_area->e->madv, &io_pf,
+			&shstk);
+
+	if (shstk)
+		vma_area->e->status |= VMA_AREA_SHSTK;
 
 	/*
 	 * vmsplice doesn't work for VM_IO and VM_PFNMAP mappings, the
 	 * only exception is VVAR area that mapped by the kernel as
 	 * VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP
+	 *
+	 * The uprobes vma is also mapped by the kernel with VM_IO, among other flags
 	 */
-	if (io_pf && !vma_area_is(vma_area, VMA_AREA_VVAR) && !vma_entry_is(vma_area->e, VMA_FILE_SHARED))
+	if (io_pf && !vma_area_is(vma_area, VMA_AREA_VVAR) && !vma_entry_is(vma_area->e, VMA_FILE_SHARED)
+		  && !vma_area_is(vma_area, VMA_AREA_UPROBES))
 		vma_area->e->status |= VMA_UNSUPP;
 
 	if (vma_area->e->madv)
 		vma_area->e->has_madv = true;
+
+	/*
+	 * We set MAP_PRIVATE flag on vma_area->e->flags right after parsing
+	 * a first line of VMA entry in /proc/<pid>/smaps file:
+	 * 7fa84fa70000-7fa84fa95000 rw-p 00000000 00:00 0
+	 * but it's too early and we can't distinguish between MAP_DROPPABLE
+	 * and MAP_PRIVATE mappings yet, as they both private mappings in nature
+	 * and at this point we haven't yet read "VmFlags:" line in smaps.
+	 *
+	 * Let's detect this situation and drop MAP_PRIVATE flag while keep
+	 * MAP_DROPPABLE, otherwise restorer's restore_mapping() helper will fail.
+	 */
+	if ((vma_area->e->flags & MAP_PRIVATE) && (vma_area->e->flags & MAP_DROPPABLE))
+		vma_area->e->flags &= ~MAP_PRIVATE;
 }
 
 static inline int is_anon_shmem_map(dev_t dev)
@@ -313,25 +349,8 @@ static int vma_get_mapfile_user(const char *fname, struct vma_area *vma, struct
 
 	vfi_dev = makedev(vfi->dev_maj, vfi->dev_min);
 
-	if (is_memfd(vfi_dev)) {
-		char tmp[PATH_MAX];
-		strlcpy(tmp, fname, PATH_MAX);
-		strip_deleted(tmp, strlen(tmp));
-
-		/*
-		 * The error EPERM will be shown in the following pr_perror().
-		 * It comes from the previous open() call.
-		 */
-		pr_perror("Can't open mapped [%s]", tmp);
-
-		/*
-		 * TODO Perhaps we could do better than failing and dump the
-		 * memory like what is being done in shmem.c
-		 */
-		return -1;
-	}
-
 	if (is_hugetlb_dev(vfi_dev, &hugetlb_flag) || is_anon_shmem_map(vfi_dev)) {
+		vma->e->status |= VMA_AREA_REGULAR;
 		if (!(vma->e->flags & MAP_SHARED))
 			vma->e->status |= VMA_ANON_PRIVATE;
 		else
@@ -359,18 +378,44 @@ static int vma_get_mapfile_user(const char *fname, struct vma_area *vma, struct
 	}
 
 	if (vma_stat(vma, fd)) {
-		close(fd);
-		return -1;
+		goto closefd;
 	}
 
-	if (vma->vmst->st_dev != vfi_dev || vma->vmst->st_ino != vfi->ino) {
-		pr_err("Failed to resolve mapping %lx filename\n", (unsigned long)vma->e->start);
-		close(fd);
-		return -1;
+	if (vma->vmst->st_ino != vfi->ino) {
+		goto errmsg;
+	}
+
+	/*
+	 * If devices don't match it could be because file is on a btrfs subvolume,
+	 * which means that device number returned by stat will not match what is
+	 * seen in smaps and other places. To deal with that we need a more involved
+	 * check.
+	 */
+	if (vma->vmst->st_dev != vfi_dev) {
+		int mnt_id;
+		struct ns_id *ns;
+
+		if (get_fd_mntid(fd, &mnt_id))
+			goto errmsg;
+
+		ns = lookup_nsid_by_mnt_id(mnt_id);
+		if (!ns)
+			goto errmsg;
+
+		if (!phys_stat_dev_match(vma->vmst->st_dev, vfi_dev, ns, fname))
+			goto errmsg;
+
+		vma->mnt_id = mnt_id;
 	}
 
 	*vm_file_fd = fd;
 	return 0;
+
+errmsg:
+	pr_err("Failed to resolve mapping %lx filename\n", (unsigned long)vma->e->start);
+closefd:
+	close(fd);
+	return -1;
 }
 
 static int vma_get_mapfile(const char *fname, struct vma_area *vma, DIR *mfd, struct vma_file_info *vfi,
@@ -557,11 +602,20 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat
 	} else if (!strcmp(file_path, "[vdso]")) {
 		if (handle_vdso_vma(vma_area))
 			goto err;
-	} else if (!strcmp(file_path, "[vvar]")) {
+	} else if (!strcmp(file_path, "[vvar]") ||
+		   !strcmp(file_path, "[vvar_vclock]")) {
 		if (handle_vvar_vma(vma_area))
 			goto err;
 	} else if (!strcmp(file_path, "[heap]")) {
 		vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_HEAP;
+	} else if (!strcmp(file_path, "[uprobes]")) {
+		uprobes_vma_exists = true;
+		if (!opts.allow_uprobes) {
+			pr_err("PID %d has uprobes vma. Consider using --" OPT_ALLOW_UPROBES ".\n",
+				pid);
+			goto err;
+		}
+		vma_area->e->status |= VMA_AREA_UPROBES;
 	} else {
 		vma_area->e->status = VMA_AREA_REGULAR;
 	}
@@ -620,17 +674,16 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat
 			pr_info("path: %s\n", file_path);
 			vma_area->e->status |= VMA_AREA_SYSVIPC;
 		} else {
-			/* Dump shmem dev, hugetlb dev (private and share) mappings the same way as memfd
-			 * when possible.
+			/* We dump memfd backed mapping, both normal and hugepage anonymous share
+			 * mapping using memfd approach when possible.
 			 */
 			if (is_memfd(st_buf->st_dev) || is_anon_shmem_map(st_buf->st_dev) ||
-			    (kdat.has_memfd_hugetlb && is_hugetlb_dev(st_buf->st_dev, &hugetlb_flag))) {
+			    can_dump_with_memfd_hugetlb(st_buf->st_dev, &hugetlb_flag, file_path, vma_area)) {
 				vma_area->e->status |= VMA_AREA_MEMFD;
 				vma_area->e->flags |= hugetlb_flag;
 				if (fault_injected(FI_HUGE_ANON_SHMEM_ID))
 					vma_area->e->shmid += FI_HUGE_ANON_SHMEM_ID_BASE;
 			} else if (is_hugetlb_dev(st_buf->st_dev, &hugetlb_flag)) {
-				/* hugetlb mapping but memfd does not support HUGETLB */
 				vma_area->e->flags |= hugetlb_flag;
 				vma_area->e->flags |= MAP_ANONYMOUS;
 
@@ -699,6 +752,10 @@ static int vma_list_add(struct vma_area *vma_area, struct vm_area_list *vma_area
 		 */
 		pr_debug("Device file mapping %016" PRIx64 "-%016" PRIx64 " supported via device plugins\n",
 			 vma_area->e->start, vma_area->e->end);
+	} else if (vma_area->e->status & VMA_AREA_UPROBES) {
+		pr_debug("Skipping uprobes vma %016" PRIx64 "-%016" PRIx64 "\n", vma_area->e->start,
+		         vma_area->e->end);
+		return 0;
 	} else if (vma_area->e->status & VMA_UNSUPP) {
 		pr_err("Unsupported mapping found %016" PRIx64 "-%016" PRIx64 "\n", vma_area->e->start,
 		       vma_area->e->end);
@@ -750,7 +807,7 @@ static int task_size_check(pid_t pid, VmaEntry *entry)
 
 int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, dump_filemap_t dump_filemap)
 {
-	struct vma_area *vma_area = NULL;
+	struct vma_area *vma_area = NULL, *prev_vma_area = NULL;
 	unsigned long start, end, pgoff, prev_end = 0;
 	char r, w, x, s;
 	int ret = -1, vm_file_fd = -1;
@@ -792,8 +849,22 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, dump_filemap_t du
 				continue;
 		}
 
-		if (vma_area && vma_list_add(vma_area, vma_area_list, &prev_end, &vfi, &prev_vfi))
-			goto err;
+		if (vma_area && vma_area_is(vma_area, VMA_AREA_VVAR) &&
+		    prev_vma_area && vma_area_is(prev_vma_area, VMA_AREA_VVAR)) {
+			if (prev_vma_area->e->end != vma_area->e->start) {
+				pr_err("two nonconsecutive vvar vma-s: "
+				       "%" PRIx64 "-%" PRIx64 " %" PRIx64 "-%" PRIx64 "\n",
+				       prev_vma_area->e->start, prev_vma_area->e->end,
+				       vma_area->e->start, vma_area->e->end);
+				goto err;
+			}
+			/* Merge all vvar vma-s into one. */
+			prev_vma_area->e->end = vma_area->e->end;
+		} else {
+			if (vma_area && vma_list_add(vma_area, vma_area_list, &prev_end, &vfi, &prev_vfi))
+				goto err;
+			prev_vma_area = vma_area;
+		}
 
 		if (eof)
 			break;
@@ -833,6 +904,7 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, dump_filemap_t du
 			goto err;
 		}
 
+		pr_debug("Handling VMA with the following smaps entry: %s\n", str);
 		if (handle_vma(pid, vma_area, str + path_off, map_files_dir, &vfi, &prev_vfi, &vm_file_fd))
 			goto err;
 
@@ -891,7 +963,7 @@ int parse_pid_stat(pid_t pid, struct proc_pid_stat *s)
 	*tok = '\0';
 	*p = '\0';
 
-	strlcpy(s->comm, tok + 1, sizeof(s->comm));
+	__strlcpy(s->comm, tok + 1, sizeof(s->comm));
 
 	n = sscanf(p + 1,
 		   " %c %d %d %d %d %d %u %lu %lu %lu %lu "
@@ -1028,12 +1100,13 @@ int parse_pid_status(pid_t pid, struct seize_task_status *ss, void *data)
 
 	cr->s.sigpnd = 0;
 	cr->s.shdpnd = 0;
+	cr->s.sigblk = 0;
 	cr->s.seccomp_mode = SECCOMP_MODE_DISABLED;
 
 	if (bfdopenr(&f))
 		return -1;
 
-	while (done < 13) {
+	while (done < 14) {
 		str = breadline(&f);
 		if (str == NULL)
 			break;
@@ -1117,6 +1190,13 @@ int parse_pid_status(pid_t pid, struct seize_task_status *ss, void *data)
 			continue;
 		}
 
+		if (!strncmp(str, "CapAmb:", 7)) {
+			if (cap_parse(str + 8, cr->cap_amb))
+				goto err_parse;
+			done++;
+			continue;
+		}
+
 		if (!strncmp(str, "Seccomp:", 8)) {
 			if (sscanf(str + 9, "%d", &cr->s.seccomp_mode) != 1) {
 				goto err_parse;
@@ -1144,13 +1224,23 @@ int parse_pid_status(pid_t pid, struct seize_task_status *ss, void *data)
 				goto err_parse;
 			cr->s.sigpnd |= sigpnd;
 
+			done++;
+			continue;
+		}
+		if (!strncmp(str, "SigBlk:", 7)) {
+			unsigned long long sigblk = 0;
+
+			if (sscanf(str + 7, "%llx", &sigblk) != 1)
+				goto err_parse;
+			cr->s.sigblk |= sigblk;
+
 			done++;
 			continue;
 		}
 	}
 
 	/* seccomp and nspids are optional */
-	expected_done = (parsed_seccomp ? 11 : 10);
+	expected_done = (parsed_seccomp ? 13 : 12);
 	if (kdat.has_nspid)
 		expected_done++;
 	if (done == expected_done)
@@ -1387,7 +1477,7 @@ static int parse_mountinfo_ent(char *str, struct mount_info *new, char **fsname)
 		goto err;
 
 	new->mountpoint[0] = '.';
-	ret = sscanf(str, "%i %i %u:%u %ms %s %ms %n", &new->mnt_id, &new->parent_mnt_id, &kmaj, &kmin, &new->root,
+	ret = sscanf(str, "%i %i %u:%u %ms %4094s %ms %n", &new->mnt_id, &new->parent_mnt_id, &kmaj, &kmin, &new->root,
 		     new->mountpoint + 1, &opt, &n);
 	if (ret != 7)
 		goto err;
@@ -1952,10 +2042,7 @@ static int parse_fdinfo_pid_s(int pid, int fd, int type, void *arg)
 				     " pos:%lli ino:%lx sdev:%x",
 				     &e->tfd, &e->events, (long long *)&e->data, (long long *)&e->pos,
 				     (long *)&e->inode, &e->dev);
-			if (ret < 3 || ret > 6) {
-				eventpoll_tfd_entry__free_unpacked(e, NULL);
-				goto parse_err;
-			} else if (ret == 3) {
+			if (ret == 3) {
 				e->has_dev = false;
 				e->has_inode = false;
 				e->has_pos = false;
@@ -1963,7 +2050,7 @@ static int parse_fdinfo_pid_s(int pid, int fd, int type, void *arg)
 				e->has_dev = true;
 				e->has_inode = true;
 				e->has_pos = true;
-			} else if (ret < 6) {
+			} else {
 				eventpoll_tfd_entry__free_unpacked(e, NULL);
 				goto parse_err;
 			}
@@ -2137,6 +2224,33 @@ static int parse_fdinfo_pid_s(int pid, int fd, int type, void *arg)
 			if (ret)
 				goto parse_err;
 
+			entry_met = true;
+			continue;
+		}
+		if (fdinfo_field(str, "ino") || fdinfo_field(str, "NSpid") || fdinfo_field(str, "Pid")) {
+			struct pidfd_dump_info *pidfd_info = arg;
+
+			if (type != FD_TYPES__PIDFD)
+				continue;
+
+			if (fdinfo_field(str, "ino")) {
+				ret = sscanf(str, "%*s %u", &pidfd_info->pidfe.ino);
+				if (ret != 1)
+					goto parse_err;
+			} else if (fdinfo_field(str, "Pid")) {
+				ret = sscanf(str, "%*s %d", &pidfd_info->pid);
+				if (ret != 1)
+					goto parse_err;
+			} else if (fdinfo_field(str, "NSpid")) {
+				char *last;
+
+				last = strrchr(str, '\t');
+				if (!last || sscanf(last, "%d", &pidfd_info->pidfe.nspid) != 1) {
+					pr_err("Unable to parse: %s\n", str);
+					goto parse_err;
+				}
+			}
+
 			entry_met = true;
 			continue;
 		}
@@ -2188,10 +2302,10 @@ static int parse_file_lock_buf(char *buf, struct file_lock *fl, bool is_blocked)
 	char fl_flag[10], fl_type[15], fl_option[10];
 
 	if (is_blocked) {
-		num = sscanf(buf, "%lld: -> %s %s %s %d %x:%x:%ld %lld %s", &fl->fl_id, fl_flag, fl_type, fl_option,
+		num = sscanf(buf, "%lld: -> %9s %14s %9s %d %x:%x:%ld %lld %31s", &fl->fl_id, fl_flag, fl_type, fl_option,
 			     &fl->fl_owner, &fl->maj, &fl->min, &fl->i_no, &fl->start, fl->end);
 	} else {
-		num = sscanf(buf, "%lld:%s %s %s %d %x:%x:%ld %lld %s", &fl->fl_id, fl_flag, fl_type, fl_option,
+		num = sscanf(buf, "%lld:%9s %14s %9s %d %x:%x:%ld %lld %31s", &fl->fl_id, fl_flag, fl_type, fl_option,
 			     &fl->fl_owner, &fl->maj, &fl->min, &fl->i_no, &fl->start, fl->end);
 	}
 
@@ -2539,7 +2653,8 @@ err:
 	return -1;
 }
 
-int parse_task_cgroup(int pid, struct parasite_dump_cgroup_args *args, struct list_head *retl, unsigned int *n)
+int parse_thread_cgroup(int pid, int tid, struct parasite_dump_cgroup_args *args, struct list_head *retl,
+			unsigned int *n)
 {
 	FILE *f;
 	int ret;
@@ -2547,7 +2662,7 @@ int parse_task_cgroup(int pid, struct parasite_dump_cgroup_args *args, struct li
 	unsigned int n_internal = 0;
 	struct cg_ctl *intern, *ext;
 
-	f = fopen_proc(pid, "cgroup");
+	f = fopen_proc(pid, "task/%d/cgroup", tid);
 	if (!f)
 		return -1;
 
@@ -2831,3 +2946,8 @@ int parse_uptime(uint64_t *upt)
 	fclose(f);
 	return 0;
 }
+
+bool found_uprobes_vma(void)
+{
+	return uprobes_vma_exists;
+}
diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c
index ff16b9f5b..e0dbfccc2 100644
--- a/criu/protobuf-desc.c
+++ b/criu/protobuf-desc.c
@@ -68,6 +68,7 @@
 #include "images/bpfmap-file.pb-c.h"
 #include "images/bpfmap-data.pb-c.h"
 #include "images/apparmor.pb-c.h"
+#include "images/pidfd.pb-c.h"
 
 struct cr_pb_message_desc cr_pb_descs[PB_MAX];
 
diff --git a/criu/pstree.c b/criu/pstree.c
index f4d77b3a4..cee8b5741 100644
--- a/criu/pstree.c
+++ b/criu/pstree.c
@@ -63,6 +63,7 @@ CoreEntry *core_entry_alloc(int th, int tsk)
 		sz += CR_CAP_SIZE * sizeof(ce->cap_prm[0]);
 		sz += CR_CAP_SIZE * sizeof(ce->cap_eff[0]);
 		sz += CR_CAP_SIZE * sizeof(ce->cap_bnd[0]);
+		sz += CR_CAP_SIZE * sizeof(ce->cap_amb[0]);
 		/*
 		 * @groups are dynamic and allocated
 		 * on demand.
@@ -122,10 +123,12 @@ CoreEntry *core_entry_alloc(int th, int tsk)
 			ce->n_cap_prm = CR_CAP_SIZE;
 			ce->n_cap_eff = CR_CAP_SIZE;
 			ce->n_cap_bnd = CR_CAP_SIZE;
+			ce->n_cap_amb = CR_CAP_SIZE;
 			ce->cap_inh = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_inh[0]));
 			ce->cap_prm = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_prm[0]));
 			ce->cap_eff = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_eff[0]));
 			ce->cap_bnd = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_bnd[0]));
+			ce->cap_amb = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_amb[0]));
 
 			if (arch_alloc_thread_info(core)) {
 				xfree(core);
@@ -179,7 +182,7 @@ void free_pstree(struct pstree_item *root_item)
 	struct pstree_item *item = root_item, *parent;
 
 	while (item) {
-		if (!list_empty(&item->children)) {
+		if (has_children(item)) {
 			item = list_first_entry(&item->children, struct pstree_item, sibling);
 			continue;
 		}
@@ -222,6 +225,7 @@ struct pstree_item *__alloc_pstree_item(bool rst)
 	item->pid->ns[0].virt = -1;
 	item->pid->real = -1;
 	item->pid->state = TASK_UNDEF;
+	item->pid->stop_signo = -1;
 	item->born_sid = -1;
 	item->pid->item = item;
 	futex_init(&item->task_st);
@@ -233,17 +237,21 @@ int init_pstree_helper(struct pstree_item *ret)
 {
 	BUG_ON(!ret->parent);
 	ret->pid->state = TASK_HELPER;
-	rsti(ret)->clone_flags = CLONE_FILES | CLONE_FS;
-	if (shared_fdt_prepare(ret) < 0)
-		return -1;
+	rsti(ret)->clone_flags = 0;
+	INIT_LIST_HEAD(&rsti(ret)->fds);
 	task_entries->nr_helpers++;
 	return 0;
 }
 
+bool has_children(struct pstree_item *item)
+{
+	return !list_empty(&item->children);
+}
+
 /* Deep first search on children */
 struct pstree_item *pstree_item_next(struct pstree_item *item)
 {
-	if (!list_empty(&item->children))
+	if (has_children(item))
 		return list_first_entry(&item->children, struct pstree_item, sibling);
 
 	while (item->parent) {
@@ -380,20 +388,26 @@ static int prepare_pstree_for_shell_job(pid_t pid)
 		}
 
 		for_each_pstree_item(pi) {
+			if (pi->sid == current_sid) {
+				pr_err("Current sid %d intersects with sid of (%d) in images\n", current_sid, vpid(pi));
+				return -1;
+			}
 			if (pi->sid == old_sid)
 				pi->sid = current_sid;
 
+			if (pi->pgid == current_sid) {
+				pr_err("Current sid %d intersects with pgid of (%d) in images\n", current_sid,
+				       vpid(pi));
+				return -1;
+			}
 			if (pi->pgid == old_sid)
 				pi->pgid = current_sid;
 		}
-
-		if (lookup_create_item(current_sid) == NULL)
-			return -1;
 	}
 
 	/* root_item is a group leader */
 	if (root_item->pgid == vpid(root_item))
-		return 0;
+		goto add_fake_session_leader;
 
 	old_gid = root_item->pgid;
 	if (old_gid != current_gid) {
@@ -406,14 +420,21 @@ static int prepare_pstree_for_shell_job(pid_t pid)
 		}
 
 		for_each_pstree_item(pi) {
+			if (current_gid != current_sid && pi->pgid == current_gid) {
+				pr_err("Current gid %d intersects with pgid of (%d) in images\n", current_gid,
+				       vpid(pi));
+				return -1;
+			}
 			if (pi->pgid == old_gid)
 				pi->pgid = current_gid;
 		}
-
-		if (lookup_create_item(current_gid) == NULL)
-			return -1;
 	}
 
+	if (old_gid != current_gid && !lookup_create_item(current_gid))
+		return -1;
+add_fake_session_leader:
+	if (old_sid != current_sid && !lookup_create_item(current_sid))
+		return -1;
 	return 0;
 }
 
@@ -945,7 +966,7 @@ static int prepare_pstree_kobj_ids(void)
 			 * this namespace is either inherited from the
 			 * criu or is created for the init task (only)
 			 */
-			pr_err("Can't restore sub-task in NS\n");
+			pr_err("Can't restore sub-task in NS (cflags %lx)\n", cflags);
 			return -1;
 		}
 	}
diff --git a/criu/seize.c b/criu/seize.c
index 58564ca74..d0cf7b36c 100644
--- a/criu/seize.c
+++ b/criu/seize.c
@@ -16,6 +16,7 @@
 #include "pstree.h"
 #include "criu-log.h"
 #include <compel/ptrace.h>
+#include "plugin.h"
 #include "proc_parse.h"
 #include "seccomp.h"
 #include "seize.h"
@@ -24,13 +25,72 @@
 #include "xmalloc.h"
 #include "util.h"
 
+static bool compel_interrupt_only_mode;
+
+/*
+ * Disables the use of freeze cgroups for process seizing, even if explicitly
+ * requested via the --freeze-cgroup option or already set in a frozen state.
+ * This is necessary for plugins (e.g., CUDA) that do not function correctly
+ * when processes are frozen using cgroups.
+ */
+void __attribute__((used)) set_compel_interrupt_only_mode(void)
+{
+	compel_interrupt_only_mode = true;
+}
+
+char *task_comm_info(pid_t pid, char *comm, size_t size)
+{
+	bool is_read = false;
+
+	if (!pr_quelled(LOG_INFO)) {
+		int saved_errno = errno;
+		char path[32];
+		int fd;
+
+		snprintf(path, sizeof(path), "/proc/%d/comm", pid);
+		fd = open(path, O_RDONLY);
+		if (fd >= 0) {
+			ssize_t n = read(fd, comm, size);
+			if (n > 0) {
+				is_read = true;
+				/* Replace '\n' printed by kernel with '\0' */
+				comm[n - 1] = '\0';
+			} else {
+				pr_warn("Failed to read %s: %s\n", path, strerror(errno));
+			}
+			close(fd);
+		} else {
+			pr_warn("Failed to open %s: %s\n", path, strerror(errno));
+		}
+		errno = saved_errno;
+	}
+
+	if (!is_read)
+		comm[0] = '\0';
+
+	return comm;
+}
+
+/*
+ * NOTE: Don't run simultaneously, it uses local static buffer!
+ */
+char *__task_comm_info(pid_t pid)
+{
+	static char comm[32];
+
+	return task_comm_info(pid, comm, sizeof(comm));
+}
+
 #define NR_ATTEMPTS 5
 
 static const char frozen[] = "FROZEN";
 static const char freezing[] = "FREEZING";
 static const char thawed[] = "THAWED";
 
-enum freezer_state { FREEZER_ERROR = -1, THAWED, FROZEN, FREEZING };
+enum freezer_state { FREEZER_ERROR = -1,
+		     THAWED,
+		     FROZEN,
+		     FREEZING };
 
 /* Track if we are running on cgroup v2 system. */
 static bool cgroup_v2 = false;
@@ -146,12 +206,12 @@ static int freezer_write_state(int fd, enum freezer_state new_state)
 	if (new_state == THAWED) {
 		if (cgroup_v2)
 			state[0] = '0';
-		else if (strlcpy(state, thawed, sizeof(state)) >= sizeof(state))
+		else if (__strlcpy(state, thawed, sizeof(state)) >= sizeof(state))
 			return -1;
 	} else if (new_state == FROZEN) {
 		if (cgroup_v2)
 			state[0] = '1';
-		else if (strlcpy(state, frozen, sizeof(state)) >= sizeof(state))
+		else if (__strlcpy(state, frozen, sizeof(state)) >= sizeof(state))
 			return -1;
 	} else {
 		return -1;
@@ -249,13 +309,13 @@ static int seize_cgroup_tree(char *root_path, enum freezer_state state)
 		if (ret == 0)
 			continue;
 		if (errno != ESRCH) {
-			pr_perror("Unexpected error");
+			pr_perror("Unexpected error for pid %d (comm %s)", pid, __task_comm_info(pid));
 			fclose(f);
 			return -1;
 		}
 
 		if (!compel_interrupt_task(pid)) {
-			pr_debug("SEIZE %d: success\n", pid);
+			pr_debug("SEIZE %d (comm %s): success\n", pid, __task_comm_info(pid));
 			processes_to_wait++;
 		} else if (state == FROZEN) {
 			char buf[] = "/proc/XXXXXXXXXX/exe";
@@ -272,7 +332,7 @@ static int seize_cgroup_tree(char *root_path, enum freezer_state state)
 			 * before it compete exit procedure. The caller simply
 			 * should wait a bit and try freezing again.
 			 */
-			pr_err("zombie found while seizing\n");
+			pr_err("zombie %d (comm %s) found while seizing\n", pid, __task_comm_info(pid));
 			fclose(f);
 			return -EAGAIN;
 		}
@@ -353,7 +413,7 @@ static int freezer_detach(void)
 {
 	int i;
 
-	if (!opts.freeze_cgroup)
+	if (!opts.freeze_cgroup || compel_interrupt_only_mode)
 		return 0;
 
 	for (i = 0; i < processes_to_wait && processes_to_wait_pids; i++) {
@@ -448,13 +508,73 @@ static int log_unfrozen_stacks(char *root)
 	return 0;
 }
 
+static int prepare_freezer_for_interrupt_only_mode(void)
+{
+	enum freezer_state state = THAWED;
+	int fd;
+	int exit_code = -1;
+
+	BUG_ON(!compel_interrupt_only_mode);
+
+	fd = freezer_open();
+	if (fd < 0)
+		return -1;
+
+	state = get_freezer_state(fd);
+	if (state == FREEZER_ERROR) {
+		goto err;
+	}
+
+	origin_freezer_state = state == FREEZING ? FROZEN : state;
+
+	if (state != THAWED) {
+		pr_warn("unfreezing cgroup for plugin compatibility\n");
+		if (freezer_write_state(fd, THAWED))
+			goto err;
+	}
+
+	exit_code = 0;
+err:
+	close(fd);
+	return exit_code;
+}
+
+static void cgroupv1_freezer_kludges(int fd, int iter, const struct timespec *req) {
+	/* As per older kernel docs (freezer-subsystem.txt before
+	 * the kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
+	 * userspace should either retry or thaw. While current
+	 * kernel cgroup v1 docs no longer mention a need to retry,
+	 * even recent kernels can't reliably freeze a cgroup v1.
+	 *
+	 * Let's keep asking the kernel to freeze from time to time.
+	 * In addition, do occasional thaw/sleep/freeze.
+	 *
+	 * This is still a game of chances (the real fix belongs to the kernel)
+	 * but these kludges might improve the probability of success.
+	 *
+	 * Cgroup v2 does not have this problem.
+	 */
+	switch (iter % 32) {
+		case 9:
+		case 20:
+			freezer_write_state(fd, FROZEN);
+			break;
+		case 31:
+			freezer_write_state(fd, THAWED);
+			nanosleep(req, NULL);
+			freezer_write_state(fd, FROZEN);
+			break;
+	}
+}
+
 static int freeze_processes(void)
 {
 	int fd, exit_code = -1;
 	enum freezer_state state = THAWED;
 
 	static const unsigned long step_ms = 100;
-	unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms;
+	/* Since opts.timeout is in seconds, multiply it by 1000 to convert to milliseconds. */
+	unsigned long nr_attempts = (opts.timeout * 1000) / step_ms;
 	unsigned long i = 0;
 
 	const struct timespec req = {
@@ -463,14 +583,12 @@ static int freeze_processes(void)
 	};
 
 	if (unlikely(!nr_attempts)) {
-		/*
-		 * If timeout is turned off, lets
-		 * wait for at least 10 seconds.
-		 */
-		nr_attempts = (10 * 1000000) / step_ms;
+		/* If the timeout is 0, wait for at least 10 seconds. */
+		nr_attempts = (10 * 1000) / step_ms;
 	}
 
-	pr_debug("freezing processes: %lu attempts with %lu ms steps\n", nr_attempts, step_ms);
+	pr_debug("freezing cgroup %s: %lu x %lums attempts, timeout: %us\n",
+		 opts.freeze_cgroup, nr_attempts, step_ms, opts.timeout);
 
 	fd = freezer_open();
 	if (fd < 0)
@@ -497,22 +615,25 @@ static int freeze_processes(void)
 		 * not read @tasks pids while freezer in
 		 * transition stage.
 		 */
-		for (; i <= nr_attempts; i++) {
+		while (1) {
 			state = get_freezer_state(fd);
 			if (state == FREEZER_ERROR) {
 				close(fd);
 				return -1;
 			}
 
-			if (state == FROZEN)
+			if (state == FROZEN || i++ == nr_attempts || alarm_timeouted())
 				break;
-			if (alarm_timeouted())
-				goto err;
+
+			if (!cgroup_v2)
+				cgroupv1_freezer_kludges(fd, i, &req);
+
 			nanosleep(&req, NULL);
 		}
 
-		if (i > nr_attempts) {
-			pr_err("Unable to freeze cgroup %s\n", opts.freeze_cgroup);
+		if (state != FROZEN) {
+			pr_err("Unable to freeze cgroup %s (%lu x %lums attempts, timeout: %us)\n",
+			       opts.freeze_cgroup, i, step_ms, opts.timeout);
 			if (!pr_quelled(LOG_DEBUG))
 				log_unfrozen_stacks(opts.freeze_cgroup);
 			goto err;
@@ -535,8 +656,10 @@ static int freeze_processes(void)
 	}
 
 err:
-	if (exit_code == 0 || origin_freezer_state == THAWED)
-		exit_code = freezer_write_state(fd, THAWED);
+	if (exit_code == 0 || origin_freezer_state == THAWED) {
+		if (freezer_write_state(fd, THAWED))
+			exit_code = -1;
+	}
 
 	if (close(fd)) {
 		pr_perror("Unable to thaw tasks");
@@ -584,15 +707,18 @@ static int collect_children(struct pstree_item *item)
 			goto free;
 		}
 
-		pr_info("Seized task %d, state %d\n", pid, ret);
-
 		c = alloc_pstree_item();
 		if (c == NULL) {
 			ret = -1;
 			goto free;
 		}
 
-		if (!opts.freeze_cgroup)
+		ret = run_plugins(PAUSE_DEVICES, pid);
+		if (ret < 0 && ret != -ENOTSUP) {
+			goto free;
+		}
+
+		if (!opts.freeze_cgroup || compel_interrupt_only_mode)
 			/* fails when meets a zombie */
 			__ignore_value(compel_interrupt_task(pid));
 
@@ -615,6 +741,11 @@ static int collect_children(struct pstree_item *item)
 		else
 			processes_to_wait--;
 
+		if (ret == TASK_STOPPED)
+			c->pid->stop_signo = compel_parse_stop_signo(pid);
+
+		pr_info("Seized task %d, state %d\n", pid, ret);
+
 		c->pid->real = pid;
 		c->parent = item;
 		c->pid->state = ret;
@@ -646,7 +777,7 @@ static void unseize_task_and_threads(const struct pstree_item *item, int st)
 	 * the item->state is the state task was in when we seized one.
 	 */
 
-	compel_resume_task(item->pid->real, item->pid->state, st);
+	compel_resume_task_sig(item->pid->real, item->pid->state, st, item->pid->stop_signo);
 
 	if (st == TASK_DEAD)
 		return;
@@ -777,7 +908,8 @@ static int collect_threads(struct pstree_item *item)
 
 		pr_info("\tSeizing %d's %d thread\n", item->pid->real, pid);
 
-		if (!opts.freeze_cgroup && compel_interrupt_task(pid))
+		if ((!opts.freeze_cgroup || compel_interrupt_only_mode) &&
+		    compel_interrupt_task(pid))
 			continue;
 
 		ret = compel_wait_task(pid, item_ppid(item), parse_pid_status, NULL, &t_creds.s, NULL);
@@ -833,7 +965,7 @@ static int collect_loop(struct pstree_item *item, int (*collect)(struct pstree_i
 {
 	int attempts = NR_ATTEMPTS, nr_inprogress = 1;
 
-	if (opts.freeze_cgroup)
+	if (opts.freeze_cgroup && !compel_interrupt_only_mode)
 		attempts = 1;
 
 	/*
@@ -876,7 +1008,7 @@ static int collect_task(struct pstree_item *item)
 	if (ret < 0)
 		goto err_close;
 
-	if ((item->pid->state == TASK_DEAD) && !list_empty(&item->children)) {
+	if ((item->pid->state == TASK_DEAD) && has_children(item)) {
 		pr_err("Zombie with children?! O_o Run, run, run!\n");
 		goto err_close;
 	}
@@ -916,7 +1048,7 @@ static int cgroup_version(void)
 int collect_pstree(void)
 {
 	pid_t pid = root_item->pid->real;
-	int ret = -1;
+	int ret, exit_code = -1;
 	struct proc_status_creds creds;
 
 	timing_start(TIME_FREEZING);
@@ -933,12 +1065,31 @@ int collect_pstree(void)
 
 	pr_debug("Detected cgroup V%d freezer\n", cgroup_v2 ? 2 : 1);
 
-	if (opts.freeze_cgroup && freeze_processes())
-		goto err;
+	if (opts.freeze_cgroup && !compel_interrupt_only_mode) {
+		ret = run_plugins(PAUSE_DEVICES, pid);
+		if (ret < 0 && ret != -ENOTSUP) {
+			goto err;
+		}
 
-	if (!opts.freeze_cgroup && compel_interrupt_task(pid)) {
-		set_cr_errno(ESRCH);
-		goto err;
+		if (freeze_processes())
+			goto err;
+	} else {
+		if (opts.freeze_cgroup && prepare_freezer_for_interrupt_only_mode())
+			goto err;
+
+		/*
+		 * Call PAUSE_DEVICES after prepare_freezer_for_interrupt_only_mode()
+		 * to be able to checkpoint containers in a frozen state.
+		 */
+		ret = run_plugins(PAUSE_DEVICES, pid);
+		if (ret < 0 && ret != -ENOTSUP) {
+			goto err;
+		}
+
+		if (compel_interrupt_task(pid)) {
+			set_cr_errno(ESRCH);
+			goto err;
+		}
 	}
 
 	ret = compel_wait_task(pid, -1, parse_pid_status, NULL, &creds.s, NULL);
@@ -950,6 +1101,9 @@ int collect_pstree(void)
 	else
 		processes_to_wait--;
 
+	if (ret == TASK_STOPPED)
+		root_item->pid->stop_signo = compel_parse_stop_signo(pid);
+
 	pr_info("Seized task %d, state %d\n", pid, ret);
 	root_item->pid->state = ret;
 
@@ -961,17 +1115,35 @@ int collect_pstree(void)
 	if (ret < 0)
 		goto err;
 
-	if (opts.freeze_cgroup && freezer_wait_processes()) {
-		ret = -1;
+	if (opts.freeze_cgroup && !compel_interrupt_only_mode &&
+	    freezer_wait_processes()) {
 		goto err;
 	}
 
-	ret = 0;
+	exit_code = 0;
 	timing_stop(TIME_FREEZING);
 	timing_start(TIME_FROZEN);
 
 err:
 	/* Freezing stage finished in time - disable timer. */
 	alarm(0);
-	return ret;
+	return exit_code;
+}
+
+int checkpoint_devices(void)
+{
+	struct pstree_item *iter;
+	int ret, exit_code = -1;
+
+	for_each_pstree_item(iter) {
+		if (!task_alive(iter))
+			continue;
+		ret = run_plugins(CHECKPOINT_DEVICES, iter->pid->real);
+		if (ret < 0 && ret != -ENOTSUP)
+			goto err;
+	}
+
+	exit_code = 0;
+err:
+	return exit_code;
 }
diff --git a/criu/servicefd.c b/criu/servicefd.c
index 06a8d3eba..dfb019066 100644
--- a/criu/servicefd.c
+++ b/criu/servicefd.c
@@ -313,4 +313,4 @@ int clone_service_fd(struct pstree_item *me)
 	ret = 0;
 
 	return ret;
-}
+}
\ No newline at end of file
diff --git a/criu/setproctitle.c b/criu/setproctitle.c
new file mode 100644
index 000000000..9e01678d2
--- /dev/null
+++ b/criu/setproctitle.c
@@ -0,0 +1,42 @@
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#ifdef CONFIG_HAS_LIBBSD
+#include <bsd/unistd.h>
+#else
+
+#include "setproctitle.h"
+
+/*
+ * setproctitle_init is in the libbsd since v0.6.0. This macro allows to
+ * compile criu with libbsd<0.6.0.
+ */
+#ifndef CONFIG_HAS_SETPROCTITLE_INIT
+#define setproctitle_init(argc, argv, envp)
+#endif
+
+#define setproctitle(fmt, ...)
+#endif
+
+void __setproctitle_init(int argc, char *argv[], char *envp[])
+{
+	setproctitle_init(argc, argv, envp);
+}
+
+#ifndef SPT_MAXTITLE
+#define SPT_MAXTITLE 255
+#endif
+
+void __setproctitle(const char *fmt, ...)
+{
+	char buf[SPT_MAXTITLE + 1];
+	va_list args;
+
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+
+	setproctitle("%s", buf);
+}
diff --git a/criu/shmem.c b/criu/shmem.c
index 81e701586..bc7aa3669 100644
--- a/criu/shmem.c
+++ b/criu/shmem.c
@@ -206,26 +206,34 @@ static int expand_shmem(struct shmem_info *si, unsigned long new_size)
 	return 0;
 }
 
-static void update_shmem_pmaps(struct shmem_info *si, u64 *map, VmaEntry *vma)
+static int update_shmem_pmaps(struct shmem_info *si, pmc_t *pmc, VmaEntry *vma)
 {
 	unsigned long shmem_pfn, vma_pfn, vma_pgcnt;
+	u64 vaddr;
 
 	if (!is_shmem_tracking_en())
-		return;
+		return 0;
 
 	vma_pgcnt = DIV_ROUND_UP(si->size - vma->pgoff, PAGE_SIZE);
-	for (vma_pfn = 0; vma_pfn < vma_pgcnt; ++vma_pfn) {
-		if (!should_dump_page(vma, map[vma_pfn]))
+	for (vma_pfn = 0, vaddr = vma->start; vma_pfn < vma_pgcnt; ++vma_pfn, vaddr += PAGE_SIZE) {
+		struct page_info page_info = {};
+
+		if (should_dump_page(pmc, vma, vaddr, &page_info))
+			return -1;
+
+		if (page_info.next != vaddr) {
+			vaddr = page_info.next - PAGE_SIZE;
 			continue;
+		}
 
 		shmem_pfn = vma_pfn + DIV_ROUND_UP(vma->pgoff, PAGE_SIZE);
-		if (map[vma_pfn] & PME_SOFT_DIRTY)
+		if (page_info.softdirty)
 			set_pstate(si->pstate_map, shmem_pfn, PST_DIRTY);
-		else if (page_is_zero(map[vma_pfn]))
-			set_pstate(si->pstate_map, shmem_pfn, PST_ZERO);
 		else
 			set_pstate(si->pstate_map, shmem_pfn, PST_DUMP);
 	}
+
+	return 0;
 }
 
 int collect_sysv_shmem(unsigned long shmid, unsigned long size)
@@ -648,7 +656,7 @@ err:
 	return -1;
 }
 
-int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map)
+int add_shmem_area(pid_t pid, VmaEntry *vma, pmc_t *pmc)
 {
 	struct shmem_info *si;
 	unsigned long size = vma->pgoff + (vma->end - vma->start);
@@ -662,7 +670,9 @@ int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map)
 			if (expand_shmem(si, size))
 				return -1;
 		}
-		update_shmem_pmaps(si, map, vma);
+
+		if (update_shmem_pmaps(si, pmc, vma))
+			return -1;
 
 		return 0;
 	}
@@ -679,7 +689,9 @@ int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map)
 
 	if (expand_shmem(si, size))
 		return -1;
-	update_shmem_pmaps(si, map, vma);
+
+	if (update_shmem_pmaps(si, pmc, vma))
+		return -1;
 
 	return 0;
 }
@@ -750,7 +762,7 @@ static int do_dump_one_shmem(int fd, void *addr, struct shmem_info *si)
 		unsigned long pgaddr;
 		int st = -1;
 
-		if (pfn >= next_hole_pfn && next_data_segment(fd, pfn, &next_data_pnf, &next_hole_pfn))
+		if (fd >= 0 && pfn >= next_hole_pfn && next_data_segment(fd, pfn, &next_data_pnf, &next_hole_pfn))
 			goto err_xfer;
 
 		if (si->pstate_map && is_shmem_tracking_en()) {
@@ -808,24 +820,62 @@ static int dump_one_shmem(struct shmem_info *si)
 {
 	int fd, ret = -1;
 	void *addr;
+	unsigned long cur, remaining;
 
 	pr_info("Dumping shared memory %ld\n", si->shmid);
 
-	fd = open_proc(si->pid, "map_files/%lx-%lx", si->start, si->end);
-	if (fd < 0)
-		goto err;
+	fd = __open_proc(si->pid, EPERM, O_RDONLY, "map_files/%lx-%lx", si->start, si->end);
+	if (fd >= 0) {
+		addr = mmap(NULL, si->size, PROT_READ, MAP_SHARED, fd, 0);
+		if (addr == MAP_FAILED) {
+			pr_perror("Can't map shmem 0x%lx (0x%lx-0x%lx)", si->shmid, si->start, si->end);
+			goto errc;
+		}
+	} else {
+		if (errno != EPERM || !opts.unprivileged) {
+			goto err;
+		}
 
-	addr = mmap(NULL, si->size, PROT_READ, MAP_SHARED, fd, 0);
-	if (addr == MAP_FAILED) {
-		pr_err("Can't map shmem 0x%lx (0x%lx-0x%lx)\n", si->shmid, si->start, si->end);
-		goto errc;
+		pr_debug("Could not access map_files/ link, falling back to /proc/$pid/mem\n");
+
+		fd = open_proc(si->pid, "mem");
+		if (fd < 0) {
+			goto err;
+		}
+
+		addr = mmap(NULL, si->size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+		if (addr == MAP_FAILED) {
+			pr_perror("Can't map empty space for shmem 0x%lx (0x%lx-0x%lx)", si->shmid, si->start, si->end);
+			goto errc;
+		}
+
+		if (lseek(fd, si->start, SEEK_SET) < 0) {
+			pr_perror("Can't seek virtual memory");
+			goto errc;
+		}
+
+		cur = 0;
+		remaining = si->size;
+		do {
+			ret = read(fd, addr + cur, remaining);
+			if (ret <= 0) {
+				pr_perror("Can't read virtual memory");
+				goto errc;
+			}
+			remaining -= ret;
+			cur += ret;
+		} while (remaining > 0);
+
+		close(fd);
+		fd = -1;
 	}
 
 	ret = do_dump_one_shmem(fd, addr, si);
 
 	munmap(addr, si->size);
 errc:
-	close(fd);
+	if (fd >= 0)
+		close(fd);
 err:
 	return ret;
 }
diff --git a/criu/sigact.c b/criu/sigact.c
new file mode 100644
index 000000000..5174644d2
--- /dev/null
+++ b/criu/sigact.c
@@ -0,0 +1,319 @@
+#include "types.h"
+#include "infect.h"
+#include "protobuf.h"
+#include "pstree.h"
+#include "parasite.h"
+#include "restorer.h"
+#include "sigact.h"
+
+/*
+ * If parent's sigaction has blocked SIGKILL (which is non-sense),
+ * this parent action is non-valid and shouldn't be inherited.
+ * Used to mark parent_act* no more valid.
+ */
+static rt_sigaction_t parent_act[SIGMAX];
+#ifdef CONFIG_COMPAT
+static rt_sigaction_t_compat parent_act_compat[SIGMAX];
+#endif
+
+static bool sa_inherited(int sig, rt_sigaction_t *sa)
+{
+	rt_sigaction_t *pa;
+	int i;
+
+	if (current == root_item)
+		return false; /* XXX -- inherit from CRIU? */
+
+	pa = &parent_act[sig];
+
+	/* Omitting non-valid sigaction */
+	if (pa->rt_sa_mask.sig[0] & (1 << SIGKILL))
+		return false;
+
+	for (i = 0; i < _KNSIG_WORDS; i++)
+		if (pa->rt_sa_mask.sig[i] != sa->rt_sa_mask.sig[i])
+			return false;
+
+	return pa->rt_sa_handler == sa->rt_sa_handler && pa->rt_sa_flags == sa->rt_sa_flags &&
+	       pa->rt_sa_restorer == sa->rt_sa_restorer;
+}
+
+static void *stack32;
+rt_sigaction_t sigchld_act;
+
+#ifdef CONFIG_COMPAT
+static bool sa_compat_inherited(int sig, rt_sigaction_t_compat *sa)
+{
+	rt_sigaction_t_compat *pa;
+	int i;
+
+	if (current == root_item)
+		return false;
+
+	pa = &parent_act_compat[sig];
+
+	/* Omitting non-valid sigaction */
+	if (pa->rt_sa_mask.sig[0] & (1 << SIGKILL))
+		return false;
+
+	for (i = 0; i < _KNSIG_WORDS; i++)
+		if (pa->rt_sa_mask.sig[i] != sa->rt_sa_mask.sig[i])
+			return false;
+
+	return pa->rt_sa_handler == sa->rt_sa_handler && pa->rt_sa_flags == sa->rt_sa_flags &&
+	       pa->rt_sa_restorer == sa->rt_sa_restorer;
+}
+
+static int restore_compat_sigaction(int sig, SaEntry *e)
+{
+	rt_sigaction_t_compat act;
+	int ret;
+
+	ASSIGN_TYPED(act.rt_sa_handler, (u32)e->sigaction);
+	ASSIGN_TYPED(act.rt_sa_flags, e->flags);
+	ASSIGN_TYPED(act.rt_sa_restorer, (u32)e->restorer);
+	BUILD_BUG_ON(sizeof(e->mask) != sizeof(act.rt_sa_mask.sig));
+	memcpy(act.rt_sa_mask.sig, &e->mask, sizeof(act.rt_sa_mask.sig));
+
+	if (sig == SIGCHLD) {
+		memcpy(&sigchld_act, &act, sizeof(rt_sigaction_t_compat));
+		return 0;
+	}
+
+	if (sa_compat_inherited(sig - 1, &act))
+		return 1;
+
+	if (!stack32) {
+		stack32 = alloc_compat_syscall_stack();
+		if (!stack32)
+			return -1;
+	}
+
+	ret = arch_compat_rt_sigaction(stack32, sig, &act);
+	if (ret < 0) {
+		pr_err("Can't restore compat sigaction: %d\n", ret);
+		return ret;
+	}
+
+	parent_act_compat[sig - 1] = act;
+	/* Mark SIGKILL blocked which makes native sigaction non-valid */
+	parent_act[sig - 1].rt_sa_mask.sig[0] |= 1 << SIGKILL;
+
+	return 1;
+}
+#else
+static int restore_compat_sigaction(int sig, SaEntry *e)
+{
+	return -1;
+}
+#endif
+
+static int restore_native_sigaction(int sig, SaEntry *e)
+{
+	rt_sigaction_t act;
+	int ret;
+
+	ASSIGN_TYPED(act.rt_sa_handler, decode_pointer(e->sigaction));
+	ASSIGN_TYPED(act.rt_sa_flags, e->flags);
+	ASSIGN_TYPED(act.rt_sa_restorer, decode_pointer(e->restorer));
+#ifdef CONFIG_MIPS
+	e->has_mask_extended = 1;
+	BUILD_BUG_ON(sizeof(e->mask) * 2 != sizeof(act.rt_sa_mask.sig));
+
+	memcpy(&(act.rt_sa_mask.sig[0]), &e->mask, sizeof(act.rt_sa_mask.sig[0]));
+	memcpy(&(act.rt_sa_mask.sig[1]), &e->mask_extended, sizeof(act.rt_sa_mask.sig[1]));
+#else
+	BUILD_BUG_ON(sizeof(e->mask) != sizeof(act.rt_sa_mask.sig));
+	memcpy(act.rt_sa_mask.sig, &e->mask, sizeof(act.rt_sa_mask.sig));
+#endif
+	if (sig == SIGCHLD) {
+		sigchld_act = act;
+		return 0;
+	}
+
+	if (sa_inherited(sig - 1, &act))
+		return 1;
+
+	/*
+	 * A pure syscall is used, because glibc
+	 * sigaction overwrites se_restorer.
+	 */
+	ret = syscall(SYS_rt_sigaction, sig, &act, NULL, sizeof(k_rtsigset_t));
+	if (ret < 0) {
+		pr_perror("Can't restore sigaction");
+		return ret;
+	}
+
+	parent_act[sig - 1] = act;
+	/* Mark SIGKILL blocked which makes compat sigaction non-valid */
+#ifdef CONFIG_COMPAT
+	parent_act_compat[sig - 1].rt_sa_mask.sig[0] |= 1 << SIGKILL;
+#endif
+
+	return 1;
+}
+
+static int prepare_sigactions_from_core(TaskCoreEntry *tc)
+{
+	int sig, i;
+
+	if (tc->n_sigactions != SIGMAX - 2) {
+		pr_err("Bad number of sigactions in the image (%d, want %d)\n", (int)tc->n_sigactions, SIGMAX - 2);
+		return -1;
+	}
+
+	pr_info("Restore on-core sigactions for %d\n", vpid(current));
+
+	for (sig = 1, i = 0; sig <= SIGMAX; sig++) {
+		int ret;
+		SaEntry *e;
+		bool sigaction_is_compat;
+
+		if (sig == SIGKILL || sig == SIGSTOP)
+			continue;
+
+		e = tc->sigactions[i++];
+		sigaction_is_compat = e->has_compat_sigaction && e->compat_sigaction;
+		if (sigaction_is_compat)
+			ret = restore_compat_sigaction(sig, e);
+		else
+			ret = restore_native_sigaction(sig, e);
+
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/* Returns number of restored signals, -1 or negative errno on fail */
+static int restore_one_sigaction(int sig, struct cr_img *img, int pid)
+{
+	bool sigaction_is_compat;
+	SaEntry *e;
+	int ret = 0;
+
+	BUG_ON(sig == SIGKILL || sig == SIGSTOP);
+
+	ret = pb_read_one_eof(img, &e, PB_SIGACT);
+	if (ret == 0) {
+		if (sig != SIGMAX_OLD + 1) { /* backward compatibility */
+			pr_err("Unexpected EOF %d\n", sig);
+			return -1;
+		}
+		pr_warn("This format of sigacts-%d.img is deprecated\n", pid);
+		return -1;
+	}
+	if (ret < 0)
+		return ret;
+
+	sigaction_is_compat = e->has_compat_sigaction && e->compat_sigaction;
+	if (sigaction_is_compat)
+		ret = restore_compat_sigaction(sig, e);
+	else
+		ret = restore_native_sigaction(sig, e);
+
+	sa_entry__free_unpacked(e, NULL);
+
+	return ret;
+}
+
+static int prepare_sigactions_from_image(void)
+{
+	int pid = vpid(current);
+	struct cr_img *img;
+	int sig, rst = 0;
+	int ret = 0;
+
+	pr_info("Restore sigacts for %d\n", pid);
+
+	img = open_image(CR_FD_SIGACT, O_RSTR, pid);
+	if (!img)
+		return -1;
+
+	for (sig = 1; sig <= SIGMAX; sig++) {
+		if (sig == SIGKILL || sig == SIGSTOP)
+			continue;
+
+		ret = restore_one_sigaction(sig, img, pid);
+		if (ret < 0)
+			break;
+		if (ret)
+			rst++;
+	}
+
+	pr_info("Restored %d/%d sigacts\n", rst, SIGMAX - 3 /* KILL, STOP and CHLD */);
+
+	close_image(img);
+	return ret;
+}
+
+int prepare_sigactions(CoreEntry *core)
+{
+	int ret;
+
+	if (!task_alive(current))
+		return 0;
+
+	if (core->tc->n_sigactions != 0)
+		ret = prepare_sigactions_from_core(core->tc);
+	else
+		ret = prepare_sigactions_from_image();
+
+	if (stack32) {
+		free_compat_syscall_stack(stack32);
+		stack32 = NULL;
+	}
+
+	return ret;
+}
+
+int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct pstree_item *item)
+{
+	TaskCoreEntry *tc = item->core[0]->tc;
+	struct parasite_dump_sa_args *args;
+	int ret, sig;
+	SaEntry *sa, **psa;
+
+	args = compel_parasite_args(ctl, struct parasite_dump_sa_args);
+
+	ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_SIGACTS, ctl);
+	if (ret < 0)
+		return ret;
+
+	psa = xmalloc((SIGMAX - 2) * (sizeof(SaEntry *) + sizeof(SaEntry)));
+	if (!psa)
+		return -1;
+
+	sa = (SaEntry *)(psa + SIGMAX - 2);
+
+	tc->n_sigactions = SIGMAX - 2;
+	tc->sigactions = psa;
+
+	for (sig = 1; sig <= SIGMAX; sig++) {
+		int i = sig - 1;
+
+		if (sig == SIGSTOP || sig == SIGKILL)
+			continue;
+
+		sa_entry__init(sa);
+		ASSIGN_TYPED(sa->sigaction, encode_pointer(args->sas[i].rt_sa_handler));
+		ASSIGN_TYPED(sa->flags, args->sas[i].rt_sa_flags);
+		ASSIGN_TYPED(sa->restorer, encode_pointer(args->sas[i].rt_sa_restorer));
+#ifdef CONFIG_MIPS
+		sa->has_mask_extended = 1;
+		BUILD_BUG_ON(sizeof(sa->mask) * 2 != sizeof(args->sas[0].rt_sa_mask.sig));
+		memcpy(&sa->mask, &(args->sas[i].rt_sa_mask.sig[0]), sizeof(sa->mask));
+		memcpy(&sa->mask_extended, &(args->sas[i].rt_sa_mask.sig[1]), sizeof(sa->mask));
+#else
+		BUILD_BUG_ON(sizeof(sa->mask) != sizeof(args->sas[0].rt_sa_mask.sig));
+		memcpy(&sa->mask, args->sas[i].rt_sa_mask.sig, sizeof(sa->mask));
+#endif
+		sa->has_compat_sigaction = true;
+		sa->compat_sigaction = !compel_mode_native(ctl);
+
+		*(psa++) = sa++;
+	}
+
+	return 0;
+}
diff --git a/criu/sk-inet.c b/criu/sk-inet.c
index e52b198c3..422edc656 100644
--- a/criu/sk-inet.c
+++ b/criu/sk-inet.c
@@ -14,6 +14,8 @@
 #include <linux/icmp.h>
 #include <linux/icmpv6.h>
 #include <poll.h>
+#include <linux/in.h>
+#include <linux/in6.h>
 
 #include "../soccr/soccr.h"
 
@@ -42,6 +44,11 @@
 #define PB_ALEN_INET  1
 #define PB_ALEN_INET6 4
 
+/* Definition for older kernels without MPTCP support (e.g. Ubuntu 20.04) */
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
 static LIST_HEAD(inet_ports);
 
 struct inet_port {
@@ -123,9 +130,13 @@ static int can_dump_ipproto(unsigned int ino, int proto, int type)
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
 	case IPPROTO_UDPLITE:
+	case IPPROTO_ICMP:
+	case IPPROTO_ICMPV6:
 		break;
 	default:
 		pr_err("Unsupported proto %d for socket %x\n", proto, ino);
+		if (proto == IPPROTO_MPTCP)
+			pr_err("For Go programs, consider using \"GODEBUG=multipathtcp=0\" to disable MPTCP\n");
 		return 0;
 	}
 
@@ -388,6 +399,10 @@ static int dump_ip_raw_opts(int sk, int family, int proto, IpOptsRawEntry *r)
 	return ret;
 }
 
+#ifndef IPV6_FREEBIND
+#define IPV6_FREEBIND 78
+#endif
+
 static int dump_ip_opts(int sk, int family, int type, int proto, IpOptsEntry *ioe)
 {
 	int ret = 0;
@@ -398,12 +413,26 @@ static int dump_ip_opts(int sk, int family, int type, int proto, IpOptsEntry *io
 		 * and fetch additional options.
 		 */
 		ret |= dump_ip_raw_opts(sk, family, proto, ioe->raw);
-	} else {
-		/* Due to kernel code we can use SOL_IP instead of SOL_IPV6 */
-		ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
-		ioe->has_freebind = ioe->freebind;
 	}
 
+	if (family == AF_INET6) {
+		if (kdat.has_ipv6_freebind)
+			ret |= dump_opt(sk, SOL_IPV6, IPV6_FREEBIND, &ioe->freebind);
+		else if (type != SOCK_RAW)
+			/* Due to kernel code we can use SOL_IP instead of SOL_IPV6 */
+			ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
+		ret |= dump_opt(sk, SOL_IPV6, IPV6_RECVPKTINFO, &ioe->pktinfo);
+	} else {
+		ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
+		ret |= dump_opt(sk, SOL_IP, IP_PKTINFO, &ioe->pktinfo);
+		ret |= dump_opt(sk, SOL_IP, IP_TOS, &ioe->tos);
+		ret |= dump_opt(sk, SOL_IP, IP_TTL, &ioe->ttl);
+	}
+	ioe->has_freebind = ioe->freebind;
+	ioe->has_pktinfo = !!ioe->pktinfo;
+	ioe->has_tos = !!ioe->tos;
+	ioe->has_ttl = !!ioe->ttl;
+
 	return ret;
 }
 
@@ -434,6 +463,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
 	IpOptsEntry ipopts = IP_OPTS_ENTRY__INIT;
 	IpOptsRawEntry ipopts_raw = IP_OPTS_RAW_ENTRY__INIT;
 	SkOptsEntry skopts = SK_OPTS_ENTRY__INIT;
+	TcpOptsEntry tcpopts = TCP_OPTS_ENTRY__INIT;
 	int ret = -1, err = -1, proto, aux, type;
 
 	ret = do_dump_opt(lfd, SOL_SOCKET, SO_PROTOCOL, &proto, sizeof(proto));
@@ -501,6 +531,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
 	ie.opts = &skopts;
 	ie.ip_opts = &ipopts;
 	ie.ip_opts->raw = &ipopts_raw;
+	ie.tcp_opts = &tcpopts;
 
 	ie.n_src_addr = PB_ALEN_INET;
 	ie.n_dst_addr = PB_ALEN_INET;
@@ -550,7 +581,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
 	if (dump_ip_opts(lfd, family, type, proto, &ipopts))
 		goto err;
 
-	if (dump_socket_opts(lfd, &skopts))
+	if (dump_socket_opts(lfd, family, &skopts))
 		goto err;
 
 	pr_info("Dumping inet socket at %d\n", p->fd);
@@ -561,9 +592,20 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
 
 	switch (proto) {
 	case IPPROTO_TCP:
-		err = (type != SOCK_RAW) ? dump_one_tcp(lfd, sk, &skopts) : 0;
 		if (sk->shutdown)
 			sk_encode_shutdown(&ie, sk->shutdown);
+
+		if (type == SOCK_RAW) {
+			err = 0;
+		} else {
+			err = dump_tcp_opts(lfd, &tcpopts);
+			if (err < 0)
+				goto err;
+
+			err = dump_one_tcp(lfd, sk, &skopts);
+			if (err < 0)
+				goto err;
+		}
 		break;
 	case IPPROTO_UDP:
 	case IPPROTO_UDPLITE:
@@ -787,8 +829,21 @@ int restore_ip_opts(int sk, int family, int proto, IpOptsEntry *ioe)
 {
 	int ret = 0;
 
-	if (ioe->has_freebind)
-		ret |= restore_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
+	if (family == AF_INET6) {
+		if (ioe->has_freebind)
+			ret |= restore_opt(sk, SOL_IPV6, IPV6_FREEBIND, &ioe->freebind);
+		if (ioe->has_pktinfo)
+			ret |= restore_opt(sk, SOL_IPV6, IPV6_RECVPKTINFO, &ioe->pktinfo);
+	} else {
+		if (ioe->has_freebind)
+			ret |= restore_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
+		if (ioe->has_pktinfo)
+			ret |= restore_opt(sk, SOL_IP, IP_PKTINFO, &ioe->pktinfo);
+		if (ioe->has_tos)
+			ret |= restore_opt(sk, SOL_IP, IP_TOS, &ioe->tos);
+		if (ioe->has_ttl)
+			ret |= restore_opt(sk, SOL_IP, IP_TTL, &ioe->ttl);
+	}
 
 	if (ioe->raw)
 		ret |= restore_ip_raw_opts(sk, family, proto, ioe->raw);
@@ -869,8 +924,9 @@ static int open_inet_sk(struct file_desc *d, int *new_fd)
 	}
 
 	if (ie->src_port) {
-		if (inet_bind(sk, ii))
-			goto err;
+		if (ie->proto != IPPROTO_ICMP && ie->proto != IPPROTO_ICMPV6)
+			if (inet_bind(sk, ii))
+				goto err;
 	}
 
 	/*
@@ -906,6 +962,9 @@ done:
 	if (restore_socket_opts(sk, ie->opts))
 		goto err;
 
+	if (ie->proto == IPPROTO_TCP && restore_tcp_opts(sk, ie->tcp_opts))
+		goto err;
+
 	if (ie->has_shutdown &&
 	    (ie->proto == IPPROTO_UDP || ie->proto == IPPROTO_UDPLITE || ie->proto == IPPROTO_TCP)) {
 		if (shutdown(sk, sk_decode_shutdown(ie->shutdown))) {
diff --git a/criu/sk-netlink.c b/criu/sk-netlink.c
index 754eed932..dc2baa1b8 100644
--- a/criu/sk-netlink.c
+++ b/criu/sk-netlink.c
@@ -161,11 +161,11 @@ static int dump_one_netlink_fd(int lfd, u32 id, const struct fd_parms *p)
 
 		ne.protocol = val;
 	}
-
+	ne.flags = p->flags;
 	ne.fown = (FownEntry *)&p->fown;
 	ne.opts = &skopts;
 
-	if (dump_socket_opts(lfd, &skopts))
+	if (dump_socket_opts(lfd, AF_NETLINK, &skopts))
 		goto err;
 
 	fe.type = FD_TYPES__NETLINKSK;
diff --git a/criu/sk-packet.c b/criu/sk-packet.c
index 1d2e23522..6530bff58 100644
--- a/criu/sk-packet.c
+++ b/criu/sk-packet.c
@@ -173,7 +173,7 @@ static int dump_one_packet_fd(int lfd, u32 id, const struct fd_parms *p)
 	psk.fown = (FownEntry *)&p->fown;
 	psk.opts = &skopts;
 
-	if (dump_socket_opts(lfd, &skopts))
+	if (dump_socket_opts(lfd, AF_PACKET, &skopts))
 		return -1;
 
 	psk.protocol = sd->proto;
diff --git a/criu/sk-tcp.c b/criu/sk-tcp.c
index 96d5d13bf..9c8bad1c3 100644
--- a/criu/sk-tcp.c
+++ b/criu/sk-tcp.c
@@ -39,6 +39,8 @@ static int lock_connection(struct inet_sk_desc *sk)
 		return iptables_lock_connection(sk);
 	else if (opts.network_lock_method == NETWORK_LOCK_NFTABLES)
 		return nftables_lock_connection(sk);
+	else if (opts.network_lock_method == NETWORK_LOCK_SKIP)
+		return 0;
 
 	return -1;
 }
@@ -50,6 +52,8 @@ static int unlock_connection(struct inet_sk_desc *sk)
 	else if (opts.network_lock_method == NETWORK_LOCK_NFTABLES)
 		/* All connections will be unlocked in network_unlock(void) */
 		return 0;
+	else if (opts.network_lock_method == NETWORK_LOCK_SKIP)
+		return 0;
 
 	return -1;
 }
@@ -131,7 +135,8 @@ void cpt_unlock_tcp_connections(void)
 static int dump_tcp_conn_state(struct inet_sk_desc *sk)
 {
 	struct libsoccr_sk *socr = sk->priv;
-	int ret, aux;
+	int exit_code = -1;
+	int ret;
 	struct cr_img *img;
 	TcpStreamEntry tse = TCP_STREAM_ENTRY__INIT;
 	char *buf;
@@ -140,11 +145,11 @@ static int dump_tcp_conn_state(struct inet_sk_desc *sk)
 	ret = libsoccr_save(socr, &data, sizeof(data));
 	if (ret < 0) {
 		pr_err("libsoccr_save() failed with %d\n", ret);
-		goto err_r;
+		goto err;
 	}
 	if (ret != sizeof(data)) {
 		pr_err("This libsocr is not supported (%d vs %d)\n", ret, (int)sizeof(data));
-		goto err_r;
+		goto err;
 	}
 
 	sk->state = data.state;
@@ -181,43 +186,22 @@ static int dump_tcp_conn_state(struct inet_sk_desc *sk)
 		tse.rcv_wup = data.rcv_wup;
 	}
 
-	/*
-	 * TCP socket options
-	 */
-
-	if (dump_opt(sk->rfd, SOL_TCP, TCP_NODELAY, &aux))
-		goto err_opt;
-
-	if (aux) {
-		tse.has_nodelay = true;
-		tse.nodelay = true;
-	}
-
-	if (dump_opt(sk->rfd, SOL_TCP, TCP_CORK, &aux))
-		goto err_opt;
-
-	if (aux) {
-		tse.has_cork = true;
-		tse.cork = true;
-	}
-
 	/*
 	 * Push the stuff to image
 	 */
-
 	img = open_image(CR_FD_TCP_STREAM, O_DUMP, sk->sd.ino);
 	if (!img)
-		goto err_img;
+		goto err;
 
 	ret = pb_write_one(img, &tse, PB_TCP_STREAM);
 	if (ret < 0)
-		goto err_iw;
+		goto err_close;
 
 	buf = libsoccr_get_queue_bytes(socr, TCP_RECV_QUEUE, SOCCR_MEM_EXCL);
 	if (buf) {
 		ret = write_img_buf(img, buf, tse.inq_len);
 		if (ret < 0)
-			goto err_iw;
+			goto err_close;
 
 		xfree(buf);
 	}
@@ -226,40 +210,40 @@ static int dump_tcp_conn_state(struct inet_sk_desc *sk)
 	if (buf) {
 		ret = write_img_buf(img, buf, tse.outq_len);
 		if (ret < 0)
-			goto err_iw;
+			goto err_close;
 
 		xfree(buf);
 	}
 
 	pr_info("Done\n");
-err_iw:
+	exit_code = 0;
+err_close:
 	close_image(img);
-err_img:
-err_opt:
-err_r:
+err:
+	return exit_code;
+}
+
+int dump_tcp_opts(int fd, TcpOptsEntry *toe)
+{
+	int ret = 0;
+
+	ret |= dump_opt(fd, SOL_TCP, TCP_NODELAY, &toe->nodelay);
+	ret |= dump_opt(fd, SOL_TCP, TCP_CORK, &toe->cork);
+	ret |= dump_opt(fd, SOL_TCP, TCP_KEEPCNT, &toe->keepcnt);
+	ret |= dump_opt(fd, SOL_TCP, TCP_KEEPIDLE, &toe->keepidle);
+	ret |= dump_opt(fd, SOL_TCP, TCP_KEEPINTVL, &toe->keepintvl);
+
+	toe->has_nodelay = !!toe->nodelay;
+	toe->has_cork = !!toe->cork;
+	toe->has_keepcnt = !!toe->keepcnt;
+	toe->has_keepidle = !!toe->keepidle;
+	toe->has_keepintvl = !!toe->keepintvl;
+
 	return ret;
 }
 
 int dump_one_tcp(int fd, struct inet_sk_desc *sk, SkOptsEntry *soe)
 {
-	soe->has_tcp_keepcnt = true;
-	if (dump_opt(fd, SOL_TCP, TCP_KEEPCNT, &soe->tcp_keepcnt)) {
-		pr_perror("Can't read TCP_KEEPCNT");
-		return -1;
-	}
-
-	soe->has_tcp_keepidle = true;
-	if (dump_opt(fd, SOL_TCP, TCP_KEEPIDLE, &soe->tcp_keepidle)) {
-		pr_perror("Can't read TCP_KEEPIDLE");
-		return -1;
-	}
-
-	soe->has_tcp_keepintvl = true;
-	if (dump_opt(fd, SOL_TCP, TCP_KEEPINTVL, &soe->tcp_keepintvl)) {
-		pr_perror("Can't read TCP_KEEPINTVL");
-		return -1;
-	}
-
 	if (sk->dst_port == 0)
 		return 0;
 
@@ -393,6 +377,11 @@ static int restore_tcp_conn_state(int sk, struct libsoccr_sk *socr, struct inet_
 	if (libsoccr_restore(socr, &data, sizeof(data)))
 		goto err_c;
 
+	/*
+	 * Restoring TCP socket options in TcpStreamEntry is
+	 * for backward compatibility only, newer versions
+	 * of CRIU use TcpOptsEntry.
+	 */
 	if (tse->has_nodelay && tse->nodelay) {
 		aux = 1;
 		if (restore_opt(sk, SOL_TCP, TCP_NODELAY, &aux))
@@ -445,6 +434,27 @@ int prepare_tcp_socks(struct task_restore_args *ta)
 	return 0;
 }
 
+int restore_tcp_opts(int sk, TcpOptsEntry *toe)
+{
+	int ret = 0;
+
+	if(!toe)
+		return ret;
+
+	if (toe->has_nodelay)
+		ret |= restore_opt(sk, SOL_TCP, TCP_NODELAY, &toe->nodelay);
+	if (toe->has_cork)
+		ret |= restore_opt(sk, SOL_TCP, TCP_CORK, &toe->cork);
+	if (toe->has_keepcnt)
+		ret |= restore_opt(sk, SOL_TCP, TCP_KEEPCNT, &toe->keepcnt);
+	if (toe->has_keepidle)
+		ret |= restore_opt(sk, SOL_TCP, TCP_KEEPIDLE, &toe->keepidle);
+	if (toe->has_keepintvl)
+		ret |= restore_opt(sk, SOL_TCP, TCP_KEEPINTVL, &toe->keepintvl);
+
+	return ret;
+}
+
 int restore_one_tcp(int fd, struct inet_sk_info *ii)
 {
 	struct libsoccr_sk *sk;
@@ -483,6 +493,8 @@ static int unlock_connection_info(struct inet_sk_info *si)
 	else if (opts.network_lock_method == NETWORK_LOCK_NFTABLES)
 		/* All connections will be unlocked in network_unlock(void) */
 		return 0;
+	else if (opts.network_lock_method == NETWORK_LOCK_SKIP)
+		return 0;
 
 	return -1;
 }
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index c6021bc1f..6145fe734 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -221,7 +221,7 @@ int kerndat_socket_unix_file(void)
 	}
 	fd = ioctl(sk, SIOCUNIXFILE);
 	if (fd < 0 && errno != ENOENT) {
-		pr_warn("Unable to open a socket file: %m\n");
+		pr_warn("Unable to open a socket file: %s\n", strerror(errno));
 		kdat.sk_unix_file = false;
 		close(sk);
 		return 0;
@@ -497,12 +497,37 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p)
 			goto err;
 		}
 
+		if (sk->wqlen != 0) {
+			/*
+			 * There's no known way to get data out of the write
+			 * queue of an icon socket. The only good solution for
+			 * now is to fail the migration.
+			 */
+			pr_err("Non-empty write queue on an in-flight socket %#x\n", ue->ino);
+			goto err;
+		}
+
 		ue->peer = e->sk_desc->sd.ino;
 
 		pr_debug("\t\tFixed inflight socket %u peer %u)\n", ue->ino, ue->peer);
+	} else if (ue->state == TCP_LISTEN) {
+		int i;
+
+		for (i = 0; i < sk->nr_icons; i++)
+			if (sk->icons[i] == 0) {
+				/*
+				 * Inode of an icon socket equal to 0 means
+				 * it's already been closed. That means we have
+				 * no simple way to check if it sent any data.
+				 * The only good solution for now is to fail
+				 * the migration.
+				 */
+				pr_err("Found a closed in-flight socket to %#x\n", ue->ino);
+				goto err;
+			}
 	}
 dump:
-	if (dump_socket_opts(lfd, skopts))
+	if (dump_socket_opts(lfd, AF_UNIX, skopts))
 		goto err;
 
 	pr_info("Dumping unix socket at %d\n", p->fd);
@@ -570,14 +595,14 @@ static int unix_resolve_name_old(int lfd, uint32_t id, struct unix_sk_desc *d, U
 	else
 		ns = lookup_ns_by_id(root_item->ids->mnt_ns_id, &mnt_ns_desc);
 	if (!ns) {
-		ret = -ENOENT;
-		goto out;
+		pr_err("Failed to lookup ns by mnt id %d\n", ue->mnt_id);
+		return -1;
 	}
 
 	mntns_root = mntns_get_root_fd(ns);
 	if (mntns_root < 0) {
-		ret = -ENOENT;
-		goto out;
+		pr_err("Failed to lookup mntns root for ns %d\n", ns->id);
+		return -1;
 	}
 
 	if (name[0] != '/') {
@@ -588,15 +613,15 @@ static int unix_resolve_name_old(int lfd, uint32_t id, struct unix_sk_desc *d, U
 
 		ret = resolve_rel_name(id, d, p, &ue->name_dir);
 		if (ret < 0)
-			goto out;
-		goto postprone;
+			return -1;
+		return 0;
 	}
 
 	snprintf(rpath, sizeof(rpath), ".%s", name);
 	if (fstatat(mntns_root, rpath, &st, 0)) {
 		if (errno != ENOENT) {
-			pr_warn("Can't stat socket %#" PRIx32 "(%s), skipping: %m (err %d)\n", id, rpath, errno);
-			goto skip;
+			pr_perror("Can't stat socket %#" PRIx32 "(%s)", id, rpath);
+			return -1;
 		}
 
 		pr_info("unix: Dropping path %s for unlinked sk %#x\n", name, id);
@@ -614,92 +639,77 @@ static int unix_resolve_name_old(int lfd, uint32_t id, struct unix_sk_desc *d, U
 
 	d->deleted = deleted;
 
-postprone:
 	return 0;
-
-out:
-	xfree(name);
-	return ret;
-skip:
-	ret = 1;
-	goto out;
 }
 
 static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d, UnixSkEntry *ue, const struct fd_parms *p)
 {
 	char *name = d->name;
-	char path[PATH_MAX], tmp[PATH_MAX];
+	char path[PATH_MAX];
 	struct stat st;
-	int fd, proc_fd, mnt_id, ret;
+	int fd, ret;
+	int exit_code = -1;
 
 	if (d->namelen == 0 || name[0] == '\0')
 		return 0;
 
-	if (kdat.sk_unix_file && (root_ns_mask & CLONE_NEWNS)) {
-		if (get_mnt_id(lfd, &mnt_id))
+	if (!kdat.sk_unix_file) {
+		pr_warn("Trying to resolve unix socket with obsolete method\n");
+		if (unix_resolve_name_old(lfd, id, d, ue, p)) {
+			pr_err("Unable to resolve unix socket name with obsolete method. "
+			       "Try a linux kernel newer than 4.10\n");
 			return -1;
-		ue->mnt_id = mnt_id;
-		ue->has_mnt_id = true;
+		}
+		return 0;
 	}
 
 	fd = ioctl(lfd, SIOCUNIXFILE);
 	if (fd < 0) {
-		pr_warn("Unable to get a socket file descriptor with SIOCUNIXFILE ioctl: %m\n");
-		goto fallback;
+		pr_perror("Unable to get a socket file descriptor with SIOCUNIXFILE ioctl");
+		return -1;
 	}
 
-	ret = fstat(fd, &st);
-	if (ret) {
+	if (root_ns_mask & CLONE_NEWNS) {
+		struct fdinfo_common fdinfo = { .mnt_id = -1 };
+
+		if (parse_fdinfo(fd, FD_TYPES__UND, &fdinfo))
+			goto out;
+
+		ue->mnt_id = fdinfo.mnt_id;
+		ue->has_mnt_id = true;
+	}
+
+	if (fstat(fd, &st)) {
 		pr_perror("Unable to fstat socket fd");
-		return -1;
+		goto out;
 	}
 	d->mode = st.st_mode;
 	d->uid = st.st_uid;
 	d->gid = st.st_gid;
 
-	proc_fd = get_service_fd(PROC_FD_OFF);
-	if (proc_fd < 0) {
-		pr_err("Unable to get service fd for proc\n");
-		return -1;
-	}
-
-	snprintf(tmp, sizeof(tmp), "self/fd/%d", fd);
-	ret = readlinkat(proc_fd, tmp, path, PATH_MAX);
-	if (ret < 0 && ret >= PATH_MAX) {
-		pr_perror("Unable to readlink %s", tmp);
+	ret = read_fd_link(fd, path, sizeof(path));
+	if (ret < 0)
 		goto out;
-	}
-	path[ret] = 0;
 
 	d->deleted = strip_deleted(path, ret);
 
 	if (name[0] != '/') {
-		ret = cut_path_ending(path, name);
-		if (ret) {
-			pr_err("Unable too resolve %s from %s\n", name, path);
+		if (cut_path_ending(path, name)) {
+			pr_err("Unable too cut %s from %s\n", name, path);
 			goto out;
 		}
 
 		ue->name_dir = xstrdup(path);
-		if (!ue->name_dir) {
-			ret = -ENOMEM;
+		if (!ue->name_dir)
 			goto out;
-		}
 
 		pr_debug("Resolved socket relative name %s to %s/%s\n", name, ue->name_dir, name);
 	}
 
-	ret = 0;
+	exit_code = 0;
 out:
 	close(fd);
-	return ret;
-
-fallback:
-	pr_warn("Trying to resolve unix socket with obsolete method\n");
-	ret = unix_resolve_name_old(lfd, id, d, ue, p);
-	if (ret < 0)
-		pr_err("Unable to resolve unix socket name with obsolete method. Try a linux kernel newer than 4.10\n");
-	return ret;
+	return exit_code;
 }
 
 /*
@@ -868,7 +878,8 @@ static int __dump_external_socket(struct unix_sk_desc *sk, struct unix_sk_desc *
 
 	if (peer->type != SOCK_DGRAM) {
 		show_one_unix("Ext stream not supported", peer);
-		pr_err("Can't dump half of stream unix connection.\n");
+		pr_err("Can't dump half of stream unix connection. name: %s; peer name: %s\n",
+		       sk->name, peer->name);
 		return -1;
 	}
 
@@ -1021,8 +1032,8 @@ static struct unix_sk_info *find_queuer_for(int id)
 	struct unix_sk_info *ui;
 
 	list_for_each_entry(ui, &unix_sockets, list) {
-		if (ui->queuer && ui->queuer->ue->id == id)
-			return ui;
+		if (ui->queuer && ui->ue->id == id)
+			return ui->queuer;
 	}
 
 	return NULL;
@@ -1420,32 +1431,22 @@ err_revert_and_exit:
 
 static int restore_file_perms(struct unix_sk_info *ui)
 {
-	if (ui->ue->file_perms) {
-		FilePermsEntry *perms = ui->ue->file_perms;
-		char fname[PATH_MAX];
+	FilePermsEntry *perms = ui->ue->file_perms;
+	char fname[PATH_MAX];
 
-		if (ui->ue->name.len >= sizeof(fname)) {
-			pr_err("The file name is too long\n");
-			return -E2BIG;
-		}
+	if (!perms)
+		return 0;
 
-		memcpy(fname, ui->name, ui->ue->name.len);
-		fname[ui->ue->name.len] = '\0';
-
-		if (fchownat(AT_FDCWD, fname, perms->uid, perms->gid, 0) < 0) {
-			int errno_cpy = errno;
-			pr_perror("Unable to change file owner and group");
-			return -errno_cpy;
-		}
-
-		if (fchmodat(AT_FDCWD, fname, perms->mode, 0) < 0) {
-			int errno_cpy = errno;
-			pr_perror("Unable to change file mode bits");
-			return -errno_cpy;
-		}
+	if (ui->ue->name.len >= sizeof(fname)) {
+		pr_err("The file name is too long\n");
+		errno = -E2BIG;
+		return -1;
 	}
 
-	return 0;
+	memcpy(fname, ui->name, ui->ue->name.len);
+	fname[ui->ue->name.len] = '\0';
+
+	return cr_fchpermat(AT_FDCWD, fname, perms->uid, perms->gid, perms->mode, 0);
 }
 
 static int keep_deleted(struct unix_sk_info *ui)
diff --git a/criu/sockets.c b/criu/sockets.c
index db772707b..e4adae03c 100644
--- a/criu/sockets.c
+++ b/criu/sockets.c
@@ -29,6 +29,7 @@
 #include "pstree.h"
 #include "util.h"
 #include "fdstore.h"
+#include "cr_options.h"
 
 #undef LOG_PREFIX
 #define LOG_PREFIX "sockets: "
@@ -37,7 +38,7 @@
 #define SOCK_DIAG_BY_FAMILY 20
 #endif
 
-#define SK_HASH_SIZE 32
+#define SK_HASH_SIZE (1 << 14)
 
 #ifndef SO_GET_FILTER
 #define SO_GET_FILTER SO_ATTACH_FILTER
@@ -64,7 +65,7 @@ const char *socket_proto_name(unsigned int proto, char *nm, size_t size)
 		[IPPROTO_IPV6] = __stringify_1(IPPROTO_IPV6), [IPPROTO_RSVP] = __stringify_1(IPPROTO_RSVP),
 		[IPPROTO_GRE] = __stringify_1(IPPROTO_GRE),   [IPPROTO_ESP] = __stringify_1(IPPROTO_ESP),
 		[IPPROTO_AH] = __stringify_1(IPPROTO_AH),     [IPPROTO_UDPLITE] = __stringify_1(IPPROTO_UDPLITE),
-		[IPPROTO_RAW] = __stringify_1(IPPROTO_RAW),
+		[IPPROTO_RAW] = __stringify_1(IPPROTO_RAW),   [IPPROTO_ICMPV6] = __stringify_1(IPPROTO_ICMPV6),
 	};
 	return __socket_const_name(nm, size, protos, ARRAY_SIZE(protos), proto);
 }
@@ -130,10 +131,12 @@ enum socket_cl_bits {
 	INET_UDP_CL_BIT,
 	INET_UDPLITE_CL_BIT,
 	INET_RAW_CL_BIT,
+	INET_ICMP_CL_BIT,
 	INET6_TCP_CL_BIT,
 	INET6_UDP_CL_BIT,
 	INET6_UDPLITE_CL_BIT,
 	INET6_RAW_CL_BIT,
+	INET6_ICMP_CL_BIT,
 	UNIX_CL_BIT,
 	PACKET_CL_BIT,
 	_MAX_CL_BIT,
@@ -160,6 +163,8 @@ static inline enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsign
 			return INET_UDPLITE_CL_BIT;
 		if (proto == IPPROTO_RAW)
 			return INET_RAW_CL_BIT;
+		if (proto == IPPROTO_ICMP)
+			return INET_ICMP_CL_BIT;
 	}
 	if (family == AF_INET6) {
 		if (proto == IPPROTO_TCP)
@@ -170,6 +175,8 @@ static inline enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsign
 			return INET6_UDPLITE_CL_BIT;
 		if (proto == IPPROTO_RAW)
 			return INET6_RAW_CL_BIT;
+		if (proto == IPPROTO_ICMPV6)
+			return INET6_ICMP_CL_BIT;
 	}
 
 	pr_err("Unknown pair family %d proto %d\n", family, proto);
@@ -281,6 +288,12 @@ void preload_socket_modules(void)
 	req.r.i.sdiag_protocol = IPPROTO_RAW;
 	probe_diag(nl, &req, -ENOENT);
 
+	req.r.i.sdiag_protocol = IPPROTO_ICMP;
+	probe_diag(nl, &req, -ENOENT);
+
+	req.r.i.sdiag_protocol = IPPROTO_ICMPV6;
+	probe_diag(nl, &req, -ENOENT);
+
 	close(nl);
 	pr_info("Done probing\n");
 }
@@ -465,18 +478,33 @@ int do_restore_opt(int sk, int level, int name, void *val, int len)
 	return 0;
 }
 
-static int sk_setbufs(void *arg, int fd, pid_t pid)
+int sk_setbufs(int sk, uint32_t *bufs)
 {
-	u32 *buf = (u32 *)arg;
+	uint32_t sndbuf = bufs[0], rcvbuf = bufs[1];
 
-	if (restore_opt(fd, SOL_SOCKET, SO_SNDBUFFORCE, &buf[0]))
-		return -1;
-	if (restore_opt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &buf[1]))
-		return -1;
+	if (setsockopt(sk, SOL_SOCKET, SO_SNDBUFFORCE, &sndbuf, sizeof(sndbuf)) ||
+	    setsockopt(sk, SOL_SOCKET, SO_RCVBUFFORCE, &rcvbuf, sizeof(rcvbuf))) {
+		if (opts.unprivileged) {
+			pr_info("Unable to set SO_SNDBUFFORCE/SO_RCVBUFFORCE, falling back to SO_SNDBUF/SO_RCVBUF\n");
+			if (setsockopt(sk, SOL_SOCKET, SO_SNDBUF, &sndbuf, sizeof(sndbuf)) ||
+			    setsockopt(sk, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf))) {
+				pr_perror("Unable to set socket SO_SNDBUF/SO_RCVBUF");
+				return -1;
+			}
+		} else {
+			pr_perror("Unable to set socket SO_SNDBUFFORCE/SO_RCVBUFFORCE");
+			return -1;
+		}
+	}
 
 	return 0;
 }
 
+static int sk_setbufs_ns(void *arg, int fd, pid_t pid)
+{
+	return sk_setbufs(fd, (uint32_t *)arg);
+}
+
 /*
  * Set sizes of buffers to maximum and prevent blocking
  * Caller of this fn should call other socket restoring
@@ -489,7 +517,7 @@ int restore_prepare_socket(int sk)
 	/* In kernel a bufsize has type int and a value is doubled. */
 	u32 maxbuf[2] = { INT_MAX / 2, INT_MAX / 2 };
 
-	if (userns_call(sk_setbufs, 0, maxbuf, sizeof(maxbuf), sk))
+	if (userns_call(sk_setbufs_ns, 0, maxbuf, sizeof(maxbuf), sk))
 		return -1;
 
 	/* Prevent blocking on restore */
@@ -517,7 +545,7 @@ int restore_socket_opts(int sk, SkOptsEntry *soe)
 	pr_info("%d restore sndbuf %d rcv buf %d\n", sk, soe->so_sndbuf, soe->so_rcvbuf);
 
 	/* setsockopt() multiplies the input values by 2 */
-	ret |= userns_call(sk_setbufs, 0, bufs, sizeof(bufs), sk);
+	ret |= userns_call(sk_setbufs_ns, 0, bufs, sizeof(bufs), sk);
 
 	if (soe->has_so_buf_lock) {
 		pr_debug("\trestore buf_lock %d for socket\n", soe->so_buf_lock);
@@ -569,6 +597,12 @@ int restore_socket_opts(int sk, SkOptsEntry *soe)
 		pr_debug("\tset keepalive for socket\n");
 		ret |= restore_opt(sk, SOL_SOCKET, SO_KEEPALIVE, &val);
 	}
+
+	/*
+	 * Restoring TCP socket options in SkOptsEntry is
+	 * for backward compatibility only, newer versions
+	 * of CRIU use TcpOptsEntry.
+	 */
 	if (soe->has_tcp_keepcnt) {
 		pr_debug("\tset keepcnt for socket\n");
 		ret |= restore_opt(sk, SOL_TCP, TCP_KEEPCNT, &soe->tcp_keepcnt);
@@ -615,7 +649,7 @@ int do_dump_opt(int sk, int level, int name, void *val, int len)
 	return 0;
 }
 
-int dump_socket_opts(int sk, SkOptsEntry *soe)
+int dump_socket_opts(int sk, int family, SkOptsEntry *soe)
 {
 	int ret = 0, val;
 	struct timeval tv;
@@ -631,8 +665,12 @@ int dump_socket_opts(int sk, SkOptsEntry *soe)
 	ret |= dump_opt(sk, SOL_SOCKET, SO_PRIORITY, &soe->so_priority);
 	soe->has_so_rcvlowat = true;
 	ret |= dump_opt(sk, SOL_SOCKET, SO_RCVLOWAT, &soe->so_rcvlowat);
-	soe->has_so_mark = true;
+	/*
+	 * Restoring SO_MARK requires root or CAP_NET_ADMIN. Avoid saving it
+	 * in unprivileged mode if still has its default value.
+	 */
 	ret |= dump_opt(sk, SOL_SOCKET, SO_MARK, &soe->so_mark);
+	soe->has_so_mark = !!soe->so_mark;
 
 	ret |= dump_opt(sk, SOL_SOCKET, SO_SNDTIMEO, &tv);
 	soe->so_snd_tmo_sec = tv.tv_sec;
@@ -650,13 +688,15 @@ int dump_socket_opts(int sk, SkOptsEntry *soe)
 	soe->so_reuseport = val ? true : false;
 	soe->has_so_reuseport = true;
 
-	ret |= dump_opt(sk, SOL_SOCKET, SO_PASSCRED, &val);
-	soe->has_so_passcred = true;
-	soe->so_passcred = val ? true : false;
+	if (family == AF_UNIX || family == AF_NETLINK) {
+		ret |= dump_opt(sk, SOL_SOCKET, SO_PASSCRED, &val);
+		soe->has_so_passcred = true;
+		soe->so_passcred = val ? true : false;
 
-	ret |= dump_opt(sk, SOL_SOCKET, SO_PASSSEC, &val);
-	soe->has_so_passsec = true;
-	soe->so_passsec = val ? true : false;
+		ret |= dump_opt(sk, SOL_SOCKET, SO_PASSSEC, &val);
+		soe->has_so_passsec = true;
+		soe->so_passsec = val ? true : false;
+	}
 
 	ret |= dump_opt(sk, SOL_SOCKET, SO_DONTROUTE, &val);
 	soe->has_so_dontroute = true;
@@ -747,6 +787,10 @@ static int inet_receive_one(struct nlmsghdr *h, struct ns_id *ns, void *arg)
 	case IPPROTO_RAW:
 		type = SOCK_RAW;
 		break;
+	case IPPROTO_ICMP:
+	case IPPROTO_ICMPV6:
+		type = SOCK_DGRAM;
+		break;
 	default:
 		BUG_ON(1);
 		return -1;
@@ -771,7 +815,7 @@ static int collect_err(int err, struct ns_id *ns, void *arg)
 	char family[32], proto[32];
 	char msg[256];
 
-	snprintf(msg, sizeof(msg), "Sockects collect procedure family %s proto %s",
+	snprintf(msg, sizeof(msg), "Sockets collect procedure family %s proto %s",
 		 socket_family_name(gr->family, family, sizeof(family)),
 		 socket_proto_name(gr->protocol, proto, sizeof(proto)));
 
@@ -879,6 +923,13 @@ int collect_sockets(struct ns_id *ns)
 	if (tmp)
 		err = tmp;
 
+	/* Collect IPv4 ICMP sockets */
+	req.r.i.sdiag_family = AF_INET;
+	req.r.i.sdiag_protocol = IPPROTO_ICMP;
+	req.r.i.idiag_ext = 0;
+	req.r.i.idiag_states = -1; /* All */
+	set_collect_bit(req.r.n.sdiag_family, req.r.n.sdiag_protocol);
+
 	/* Collect IPv6 TCP sockets */
 	req.r.i.sdiag_family = AF_INET6;
 	req.r.i.sdiag_protocol = IPPROTO_TCP;
@@ -918,6 +969,13 @@ int collect_sockets(struct ns_id *ns)
 	if (tmp)
 		err = tmp;
 
+	/* Collect IPv6 ICMP sockets */
+	req.r.i.sdiag_family = AF_INET6;
+	req.r.i.sdiag_protocol = IPPROTO_ICMPV6;
+	req.r.i.idiag_ext = 0;
+	req.r.i.idiag_states = -1; /* All */
+	set_collect_bit(req.r.n.sdiag_family, req.r.n.sdiag_protocol);
+
 	req.r.p.sdiag_family = AF_PACKET;
 	req.r.p.sdiag_protocol = 0;
 	req.r.p.pdiag_show = PACKET_SHOW_INFO | PACKET_SHOW_MCLIST | PACKET_SHOW_FANOUT | PACKET_SHOW_RING_CFG;
diff --git a/criu/string.c b/criu/string.c
index 7df0b3e09..7edd35363 100644
--- a/criu/string.c
+++ b/criu/string.c
@@ -6,7 +6,6 @@
 
 #include "string.h"
 
-#ifndef CONFIG_HAS_STRLCPY
 /**
  * strlcpy - Copy a %NUL terminated string into a sized buffer
  * @dest: Where to copy the string to
@@ -18,7 +17,7 @@
  * of course, the buffer size is zero). It does not pad
  * out the result like strncpy() does.
  */
-size_t strlcpy(char *dest, const char *src, size_t size)
+size_t __strlcpy(char *dest, const char *src, size_t size)
 {
 	size_t ret = strlen(src);
 
@@ -29,16 +28,14 @@ size_t strlcpy(char *dest, const char *src, size_t size)
 	}
 	return ret;
 }
-#endif
 
-#ifndef CONFIG_HAS_STRLCAT
 /**
  * strlcat - Append a length-limited, %NUL-terminated string to another
  * @dest: The string to be appended to
  * @src: The string to append to it
  * @count: The size of the destination buffer.
  */
-size_t strlcat(char *dest, const char *src, size_t count)
+size_t __strlcat(char *dest, const char *src, size_t count)
 {
 	size_t dsize = strlen(dest);
 	size_t len = strlen(src);
@@ -57,4 +54,3 @@ size_t strlcat(char *dest, const char *src, size_t count)
 	dest[len] = 0;
 	return res;
 }
-#endif
diff --git a/criu/sysctl.c b/criu/sysctl.c
index b06688712..99026acf4 100644
--- a/criu/sysctl.c
+++ b/criu/sysctl.c
@@ -203,6 +203,17 @@ static int __userns_sysctl_op(void *arg, int proc_fd, pid_t pid)
 	 * 2. forks a task
 	 * 3. setns()es to the UTS/IPC namespace of the caller
 	 * 4. write()s to the files and exits
+	 *
+	 * For the IPC namespace, since
+	 * https://github.com/torvalds/linux/commit/5563cabdde, user with
+	 * enough capability can open IPC sysctl files and write to it. Later
+	 * commit https://github.com/torvalds/linux/commit/1f5c135ee5 and
+	 * https://github.com/torvalds/linux/commit/0889f44e28 bind the IPC
+	 * namespace at the open() time so the changed value does not depend
+	 * on the IPC namespace at the write() time. Also, the permission check
+	 * changes a little bit which makes the above approach unusable but we
+	 * can simply use nonuserns version for restoring as IPC sysctl as the
+	 * restored process currently has enough capability.
 	 */
 	dir = open("/proc/sys", O_RDONLY, O_DIRECTORY);
 	if (dir < 0) {
@@ -335,9 +346,12 @@ out:
 	return ret;
 }
 
-static int __nonuserns_sysctl_op(struct sysctl_req *req, size_t nr_req, int op)
+/* exit_code = 1 in case nonuserns failed but we want to fallback to userns approach */
+static int __nonuserns_sysctl_op(struct sysctl_req **orig_req, size_t *orig_nr_req, int op)
 {
 	int ret, exit_code = -1;
+	struct sysctl_req *req = *orig_req;
+	size_t nr_req = *orig_nr_req;
 
 	while (nr_req--) {
 		int fd;
@@ -351,6 +365,14 @@ static int __nonuserns_sysctl_op(struct sysctl_req *req, size_t nr_req, int op)
 				req++;
 				continue;
 			}
+			if (errno == EACCES && (req->flags & CTL_FLAGS_IPC_EACCES_SKIP)) {
+				/* The remaining requests are restored using userns approach */
+				*orig_req = req;
+				*orig_nr_req = nr_req + 1;
+				exit_code = 1;
+				goto out;
+			}
+
 			pr_perror("Can't open sysctl %s", req->name);
 			goto out;
 		}
@@ -404,7 +426,16 @@ int sysctl_op(struct sysctl_req *req, size_t nr_req, int op, unsigned int ns)
 	 * so we can do those in process as well.
 	 */
 	if (!ns || ns & CLONE_NEWNET || op == CTL_READ)
-		return __nonuserns_sysctl_op(req, nr_req, op);
+		return __nonuserns_sysctl_op(&req, &nr_req, op);
+
+	/* Try to use nonuserns for restoring IPC sysctl and fallback to
+	 * userns approach when the returned code is 1.
+	 */
+	if (ns & CLONE_NEWIPC && op == CTL_WRITE) {
+		ret = __nonuserns_sysctl_op(&req, &nr_req, op);
+		if (ret <= 0)
+			return ret;
+	}
 
 	/*
 	 * In order to avoid lots of opening of /proc/sys for each struct sysctl_req,
diff --git a/criu/timens.c b/criu/timens.c
index 5803fc359..257782e5a 100644
--- a/criu/timens.c
+++ b/criu/timens.c
@@ -5,6 +5,7 @@
 #include "proc_parse.h"
 #include "namespaces.h"
 #include "timens.h"
+#include "cr_options.h"
 
 #include "protobuf.h"
 #include "images/timens.pb-c.h"
@@ -57,6 +58,9 @@ int prepare_timens(int id)
 	struct timespec ts;
 	struct timespec prev_moff = {}, prev_boff = {};
 
+	if (opts.unprivileged)
+		return 0;
+
 	img = open_image(CR_FD_TIMENS, O_RSTR, id);
 	if (!img)
 		return -1;
@@ -92,8 +96,8 @@ int prepare_timens(int id)
 	ts.tv_nsec = te->monotonic->tv_nsec - ts.tv_nsec;
 	normalize_timespec(&ts);
 
-	pr_debug("timens: monotonic %ld %ld\n", ts.tv_sec, ts.tv_nsec);
-	if (dprintf(fd, "%d %ld %ld\n", CLOCK_MONOTONIC, ts.tv_sec, ts.tv_nsec) < 0) {
+	pr_debug("timens: monotonic %" PRId64 " %ld\n", (int64_t)ts.tv_sec, ts.tv_nsec);
+	if (dprintf(fd, "%d %" PRId64 " %ld\n", CLOCK_MONOTONIC, (int64_t)ts.tv_sec, ts.tv_nsec) < 0) {
 		pr_perror("Unable to set a monotonic clock offset");
 		goto err;
 	}
@@ -107,8 +111,8 @@ int prepare_timens(int id)
 	ts.tv_nsec = te->boottime->tv_nsec - ts.tv_nsec;
 	normalize_timespec(&ts);
 
-	pr_debug("timens: boottime %ld %ld\n", ts.tv_sec, ts.tv_nsec);
-	if (dprintf(fd, "%d %ld %ld\n", CLOCK_BOOTTIME, ts.tv_sec, ts.tv_nsec) < 0) {
+	pr_debug("timens: boottime %" PRId64 " %ld\n", (int64_t)ts.tv_sec, ts.tv_nsec);
+	if (dprintf(fd, "%d %" PRId64 " %ld\n", CLOCK_BOOTTIME, (int64_t)ts.tv_sec, ts.tv_nsec) < 0) {
 		pr_perror("Unable to set a boottime clock offset");
 		goto err;
 	}
diff --git a/criu/timer.c b/criu/timer.c
new file mode 100644
index 000000000..856501be6
--- /dev/null
+++ b/criu/timer.c
@@ -0,0 +1,402 @@
+#include "types.h"
+#include "crtools.h"
+#include "infect.h"
+#include "protobuf.h"
+#include "pstree.h"
+#include "posix-timer.h"
+#include "parasite.h"
+#include "namespaces.h"
+#include "rst-malloc.h"
+#include "restorer.h"
+
+static inline int timeval_valid(struct timeval *tv)
+{
+	return (tv->tv_sec >= 0) && ((unsigned long)tv->tv_usec < USEC_PER_SEC);
+}
+
+static inline int decode_itimer(char *n, ItimerEntry *ie, struct itimerval *val)
+{
+	if (ie->isec == 0 && ie->iusec == 0 && ie->vsec == 0 && ie->vusec == 0) {
+		memzero_p(val);
+		return 0;
+	}
+
+	val->it_interval.tv_sec = ie->isec;
+	val->it_interval.tv_usec = ie->iusec;
+
+	if (!timeval_valid(&val->it_interval)) {
+		pr_err("Invalid timer interval\n");
+		return -1;
+	}
+
+	if (ie->vsec == 0 && ie->vusec == 0) {
+		/*
+		 * Remaining time was too short. Set it to
+		 * interval to make the timer armed and work.
+		 */
+		val->it_value.tv_sec = ie->isec;
+		val->it_value.tv_usec = ie->iusec;
+	} else {
+		val->it_value.tv_sec = ie->vsec;
+		val->it_value.tv_usec = ie->vusec;
+	}
+
+	if (!timeval_valid(&val->it_value)) {
+		pr_err("Invalid timer value\n");
+		return -1;
+	}
+
+	pr_info("Restored %s timer to %" PRId64 ".%" PRId64 " -> %" PRId64 ".%" PRId64 "\n", n,
+		(int64_t)val->it_value.tv_sec, (int64_t)val->it_value.tv_usec,
+		(int64_t)val->it_interval.tv_sec, (int64_t)val->it_interval.tv_usec);
+
+	return 0;
+}
+
+/*
+ * Legacy itimers restore from CR_FD_ITIMERS
+ */
+
+int prepare_itimers_from_fd(int pid, struct task_restore_args *args)
+{
+	int ret = -1;
+	struct cr_img *img;
+	ItimerEntry *ie;
+
+	if (!deprecated_ok("Itimers"))
+		return -1;
+
+	img = open_image(CR_FD_ITIMERS, O_RSTR, pid);
+	if (!img)
+		return -1;
+
+	ret = pb_read_one(img, &ie, PB_ITIMER);
+	if (ret < 0)
+		goto out;
+	ret = decode_itimer("real", ie, &args->itimers[0]);
+	itimer_entry__free_unpacked(ie, NULL);
+	if (ret < 0)
+		goto out;
+
+	ret = pb_read_one(img, &ie, PB_ITIMER);
+	if (ret < 0)
+		goto out;
+	ret = decode_itimer("virt", ie, &args->itimers[1]);
+	itimer_entry__free_unpacked(ie, NULL);
+	if (ret < 0)
+		goto out;
+
+	ret = pb_read_one(img, &ie, PB_ITIMER);
+	if (ret < 0)
+		goto out;
+	ret = decode_itimer("prof", ie, &args->itimers[2]);
+	itimer_entry__free_unpacked(ie, NULL);
+	if (ret < 0)
+		goto out;
+out:
+	close_image(img);
+	return ret;
+}
+
+int prepare_itimers(int pid, struct task_restore_args *args, CoreEntry *core)
+{
+	int ret = 0;
+	TaskTimersEntry *tte = core->tc->timers;
+
+	if (!tte)
+		return prepare_itimers_from_fd(pid, args);
+
+	ret |= decode_itimer("real", tte->real, &args->itimers[0]);
+	ret |= decode_itimer("virt", tte->virt, &args->itimers[1]);
+	ret |= decode_itimer("prof", tte->prof, &args->itimers[2]);
+
+	return ret;
+}
+
+static inline int timespec_valid(struct timespec *ts)
+{
+	return (ts->tv_sec >= 0) && ((unsigned long)ts->tv_nsec < NSEC_PER_SEC);
+}
+
+static inline int decode_posix_timer(PosixTimerEntry *pte, struct restore_posix_timer *pt)
+{
+	pt->val.it_interval.tv_sec = pte->isec;
+	pt->val.it_interval.tv_nsec = pte->insec;
+
+	if (!timespec_valid(&pt->val.it_interval)) {
+		pr_err("Invalid timer interval(posix)\n");
+		return -1;
+	}
+
+	if (pte->vsec == 0 && pte->vnsec == 0) {
+		/*
+		 * Remaining time was too short. Set it to
+		 * interval to make the timer armed and work.
+		 */
+		pt->val.it_value.tv_sec = pte->isec;
+		pt->val.it_value.tv_nsec = pte->insec;
+	} else {
+		pt->val.it_value.tv_sec = pte->vsec;
+		pt->val.it_value.tv_nsec = pte->vnsec;
+	}
+
+	if (!timespec_valid(&pt->val.it_value)) {
+		pr_err("Invalid timer value(posix)\n");
+		return -1;
+	}
+
+	pt->spt.it_id = pte->it_id;
+	pt->spt.clock_id = pte->clock_id;
+	pt->spt.si_signo = pte->si_signo;
+	pt->spt.it_sigev_notify = pte->it_sigev_notify;
+	pt->spt.sival_ptr = decode_pointer(pte->sival_ptr);
+	pt->spt.notify_thread_id = pte->notify_thread_id;
+	pt->overrun = pte->overrun;
+
+	return 0;
+}
+
+static int cmp_posix_timer_proc_id(const void *p1, const void *p2)
+{
+	return ((struct restore_posix_timer *)p1)->spt.it_id - ((struct restore_posix_timer *)p2)->spt.it_id;
+}
+
+static void sort_posix_timers(struct task_restore_args *ta)
+{
+	void *tmem;
+
+	/*
+	 * This is required for restorer's create_posix_timers(),
+	 * it will probe them one-by-one for the desired ID, since
+	 * kernel doesn't provide another API for timer creation
+	 * with given ID.
+	 */
+
+	if (ta->posix_timers_n > 0) {
+		tmem = rst_mem_remap_ptr((unsigned long)ta->posix_timers, RM_PRIVATE);
+		qsort(tmem, ta->posix_timers_n, sizeof(struct restore_posix_timer), cmp_posix_timer_proc_id);
+	}
+}
+
+/*
+ * Legacy posix timers restoration from CR_FD_POSIX_TIMERS
+ */
+
+int prepare_posix_timers_from_fd(int pid, struct task_restore_args *ta)
+{
+	struct cr_img *img;
+	int ret = -1;
+	struct restore_posix_timer *t;
+
+	if (!deprecated_ok("Posix timers"))
+		return -1;
+
+	img = open_image(CR_FD_POSIX_TIMERS, O_RSTR, pid);
+	if (!img)
+		return -1;
+
+	ta->posix_timer_cr_ids = kdat.has_timer_cr_ids;
+	ta->posix_timers_n = 0;
+	while (1) {
+		PosixTimerEntry *pte;
+
+		ret = pb_read_one_eof(img, &pte, PB_POSIX_TIMER);
+		if (ret <= 0)
+			break;
+
+		t = rst_mem_alloc(sizeof(struct restore_posix_timer), RM_PRIVATE);
+		if (!t)
+			break;
+
+		ret = decode_posix_timer(pte, t);
+		if (ret < 0)
+			break;
+
+		posix_timer_entry__free_unpacked(pte, NULL);
+		ta->posix_timers_n++;
+	}
+
+	close_image(img);
+	if (!ret)
+		sort_posix_timers(ta);
+
+	return ret;
+}
+
+int prepare_posix_timers(int pid, struct task_restore_args *ta, CoreEntry *core)
+{
+	int i, ret = -1;
+	TaskTimersEntry *tte = core->tc->timers;
+	struct restore_posix_timer *t;
+
+	ta->posix_timers = (struct restore_posix_timer *)rst_mem_align_cpos(RM_PRIVATE);
+
+	if (!tte)
+		return prepare_posix_timers_from_fd(pid, ta);
+
+	ta->posix_timers_n = tte->n_posix;
+	ta->posix_timer_cr_ids = kdat.has_timer_cr_ids;
+	for (i = 0; i < ta->posix_timers_n; i++) {
+		t = rst_mem_alloc(sizeof(struct restore_posix_timer), RM_PRIVATE);
+		if (!t)
+			goto out;
+
+		if (decode_posix_timer(tte->posix[i], t))
+			goto out;
+	}
+
+	ret = 0;
+	sort_posix_timers(ta);
+out:
+	return ret;
+}
+
+static void encode_itimer(struct itimerval *v, ItimerEntry *ie)
+{
+	ie->isec = v->it_interval.tv_sec;
+	ie->iusec = v->it_interval.tv_usec;
+	ie->vsec = v->it_value.tv_sec;
+	ie->vusec = v->it_value.tv_usec;
+}
+
+int parasite_dump_itimers_seized(struct parasite_ctl *ctl, struct pstree_item *item)
+{
+	CoreEntry *core = item->core[0];
+	struct parasite_dump_itimers_args *args;
+	int ret;
+
+	args = compel_parasite_args(ctl, struct parasite_dump_itimers_args);
+
+	ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_ITIMERS, ctl);
+	if (ret < 0)
+		return ret;
+
+	encode_itimer((&args->real), (core->tc->timers->real));
+	encode_itimer((&args->virt), (core->tc->timers->virt));
+	encode_itimer((&args->prof), (core->tc->timers->prof));
+
+	return 0;
+}
+
+static int core_alloc_posix_timers(TaskTimersEntry *tte, int n, PosixTimerEntry **pte)
+{
+	int sz;
+
+	/*
+	 * Will be free()-ed in core_entry_free()
+	 */
+
+	sz = n * (sizeof(PosixTimerEntry *) + sizeof(PosixTimerEntry));
+	tte->posix = xmalloc(sz);
+	if (!tte->posix)
+		return -1;
+
+	tte->n_posix = n;
+	*pte = (PosixTimerEntry *)(tte->posix + n);
+	return 0;
+}
+
+static int encode_notify_thread_id(pid_t rtid, struct pstree_item *item, PosixTimerEntry *pte)
+{
+	pid_t vtid = 0;
+	int i;
+
+	if (rtid == 0)
+		return 0;
+
+	if (!(root_ns_mask & CLONE_NEWPID)) {
+		/* Non-pid-namespace case */
+		pte->notify_thread_id = rtid;
+		pte->has_notify_thread_id = true;
+		return 0;
+	}
+
+	/* Pid-namespace case */
+	if (!kdat.has_nspid) {
+		pr_err("Have no NSpid support to dump notify thread id in pid namespace\n");
+		return -1;
+	}
+
+	for (i = 0; i < item->nr_threads; i++) {
+		if (item->threads[i].real != rtid)
+			continue;
+
+		vtid = item->threads[i].ns[0].virt;
+		break;
+	}
+
+	if (vtid == 0) {
+		pr_err("Unable to convert the notify thread id %d\n", rtid);
+		return -1;
+	}
+
+	pte->notify_thread_id = vtid;
+	pte->has_notify_thread_id = true;
+	return 0;
+}
+
+static int encode_posix_timer(struct pstree_item *item, struct posix_timer *v, struct proc_posix_timer *vp,
+			      PosixTimerEntry *pte)
+{
+	pte->it_id = vp->spt.it_id;
+	pte->clock_id = vp->spt.clock_id;
+	pte->si_signo = vp->spt.si_signo;
+	pte->it_sigev_notify = vp->spt.it_sigev_notify;
+	pte->sival_ptr = encode_pointer(vp->spt.sival_ptr);
+
+	pte->overrun = v->overrun;
+
+	pte->isec = v->val.it_interval.tv_sec;
+	pte->insec = v->val.it_interval.tv_nsec;
+	pte->vsec = v->val.it_value.tv_sec;
+	pte->vnsec = v->val.it_value.tv_nsec;
+
+	if (encode_notify_thread_id(vp->spt.notify_thread_id, item, pte))
+		return -1;
+
+	return 0;
+}
+
+int parasite_dump_posix_timers_seized(struct proc_posix_timers_stat *proc_args, struct parasite_ctl *ctl,
+				      struct pstree_item *item)
+{
+	CoreEntry *core = item->core[0];
+	TaskTimersEntry *tte = core->tc->timers;
+	PosixTimerEntry *pte;
+	struct proc_posix_timer *temp;
+	struct parasite_dump_posix_timers_args *args;
+	int ret, exit_code = -1;
+	int args_size;
+	int i;
+
+	if (core_alloc_posix_timers(tte, proc_args->timer_n, &pte))
+		return -1;
+
+	args_size = posix_timers_dump_size(proc_args->timer_n);
+	args = compel_parasite_args_s(ctl, args_size);
+	args->timer_n = proc_args->timer_n;
+
+	i = 0;
+	list_for_each_entry(temp, &proc_args->timers, list) {
+		args->timer[i].it_id = temp->spt.it_id;
+		i++;
+	}
+
+	ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_POSIX_TIMERS, ctl);
+	if (ret < 0)
+		goto end_posix;
+
+	i = 0;
+	list_for_each_entry(temp, &proc_args->timers, list) {
+		posix_timer_entry__init(&pte[i]);
+		if (encode_posix_timer(item, &args->timer[i], temp, &pte[i]))
+			goto end_posix;
+		tte->posix[i] = &pte[i];
+		i++;
+	}
+
+	exit_code = 0;
+end_posix:
+	free_posix_timers(proc_args);
+	return exit_code;
+}
diff --git a/criu/tty.c b/criu/tty.c
index 13f645f3a..9a4520d53 100644
--- a/criu/tty.c
+++ b/criu/tty.c
@@ -22,6 +22,7 @@
 #include "rst-malloc.h"
 #include "log.h"
 #include "common/list.h"
+#include "util.h"
 #include "util-pie.h"
 #include "proc_parse.h"
 #include "file-ids.h"
@@ -258,7 +259,7 @@ static int pts_fd_get_index(int fd, const struct fd_parms *p)
 {
 	int index;
 	const struct fd_link *link = p->link;
-	char *pos = strrchr(link->name, '/');
+	const char *pos = strrchr(link->name, '/');
 
 	if (!pos || pos == (link->name + link->len - 1)) {
 		pr_err("Unexpected format on path %s\n", link->name + 1);
@@ -398,8 +399,7 @@ static int tty_verify_active_pairs(void)
 {
 	unsigned long i, unpaired_slaves = 0;
 
-	for_each_bit(i, tty_active_pairs)
-	{
+	for_each_bit(i, tty_active_pairs) {
 		if ((i % 2) == 0) {
 			if (test_bit(i + 1, tty_active_pairs)) {
 				i++;
@@ -817,8 +817,26 @@ static int do_restore_tty_parms(void *arg, int fd, pid_t pid)
 	 * on termios too. Just to be on the safe side.
 	 */
 
-	if ((p->has & HAS_TERMIOS_L) && ioctl(fd, TIOCSLCKTRMIOS, &p->tl) < 0)
-		goto err;
+	if ((p->has & HAS_TERMIOS_L) && ioctl(fd, TIOCSLCKTRMIOS, &p->tl) < 0) {
+		struct termios t;
+
+		if (errno != EPERM)
+			goto err;
+
+		memzero(&t, sizeof(t));
+		if (ioctl(fd, TIOCGLCKTRMIOS, &t) < 0) {
+			pr_perror("Can't get tty locked params on %#x", p->tty_id);
+			goto err;
+		}
+
+		/*
+		 * The ioctl(TIOCSLCKTRMIOS) requires a CRIU process to be privileged
+		 * in the init_user_ns, but if the current "termios_locked" value equal
+		 * to the "termios_locked" value from the image, we can safely skip setting it.
+		 */
+		if (memcmp(&t, &p->tl, sizeof(struct termios)) != 0)
+			goto err;
+	}
 
 	if ((p->has & HAS_TERMIOS) && ioctl(fd, TCSETS, &p->t) < 0)
 		goto err;
@@ -868,7 +886,7 @@ static int restore_tty_params(int fd, struct tty_info *info)
 	}
 
 	if (info->tie->has_uid && info->tie->has_gid) {
-		if (fchown(fd, info->tie->uid, info->tie->gid)) {
+		if (cr_fchown(fd, info->tie->uid, info->tie->gid)) {
 			pr_perror("Can't setup uid %d gid %d on %#x", (int)info->tie->uid, (int)info->tie->gid,
 				  info->tfe->id);
 			return -1;
diff --git a/criu/tun.c b/criu/tun.c
index 573137091..9d66f9929 100644
--- a/criu/tun.c
+++ b/criu/tun.c
@@ -121,7 +121,7 @@ static int list_tun_link(NetDeviceEntry *nde, unsigned ns_id)
 	if (!tl)
 		return -1;
 
-	strlcpy(tl->name, nde->name, sizeof(tl->name));
+	__strlcpy(tl->name, nde->name, sizeof(tl->name));
 	/*
 	 * Keep tun-flags not only for persistency fixup (see
 	 * comment below), but also for TUNSETIFF -- we must
@@ -153,7 +153,7 @@ static struct tun_link *__dump_tun_link_fd(int fd, char *name, unsigned ns_id, u
 	tl = xmalloc(sizeof(*tl));
 	if (!tl)
 		goto err;
-	strlcpy(tl->name, name, sizeof(tl->name));
+	__strlcpy(tl->name, name, sizeof(tl->name));
 	tl->ns_id = ns_id;
 	INIT_LIST_HEAD(&tl->l);
 
@@ -241,7 +241,7 @@ static int open_tun_dev(char *name, unsigned int idx, unsigned flags)
 	}
 
 	memset(&ifr, 0, sizeof(ifr));
-	strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+	__strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
 	ifr.ifr_flags = flags;
 
 	if (ioctl(fd, TUNSETIFF, &ifr)) {
@@ -393,7 +393,7 @@ static int tunfile_open(struct file_desc *d, int *new_fd)
 	}
 
 	memset(&ifr, 0, sizeof(ifr));
-	strlcpy(ifr.ifr_name, tl->name, sizeof(ifr.ifr_name));
+	__strlcpy(ifr.ifr_name, tl->name, sizeof(ifr.ifr_name));
 	ifr.ifr_flags = tl->rst.flags;
 
 	if (ioctl(fd, TUNSETIFF, &ifr) < 0) {
@@ -455,27 +455,26 @@ int dump_tun_link(NetDeviceEntry *nde, struct cr_imgset *fds, struct nlattr **in
 	TunLinkEntry tle = TUN_LINK_ENTRY__INIT;
 	char spath[64];
 	char buf[64];
-	int ret = 0;
 	struct tun_link *tl;
 
 	sprintf(spath, "class/net/%s/tun_flags", nde->name);
-	ret |= read_ns_sys_file(spath, buf, sizeof(buf));
+	if (read_ns_sys_file(spath, buf, sizeof(buf)) < 0)
+		return -1;
 	tle.flags = strtol(buf, NULL, 0);
 
 	sprintf(spath, "class/net/%s/owner", nde->name);
-	ret |= read_ns_sys_file(spath, buf, sizeof(buf));
+	if (read_ns_sys_file(spath, buf, sizeof(buf)) < 0)
+		return -1;
 	tle.owner = strtol(buf, NULL, 10);
 
 	sprintf(spath, "class/net/%s/group", nde->name);
-	ret |= read_ns_sys_file(spath, buf, sizeof(buf));
+	if (read_ns_sys_file(spath, buf, sizeof(buf)) < 0)
+		return -1;
 	tle.group = strtol(buf, NULL, 10);
 
-	if (ret < 0)
-		return ret;
-
 	tl = get_tun_link_fd(nde->name, nde->peer_nsid, tle.flags);
 	if (!tl)
-		return ret;
+		return -1;
 
 	tle.vnethdr = tl->dmp.vnethdr;
 	tle.sndbuf = tl->dmp.sndbuf;
diff --git a/criu/uffd.c b/criu/uffd.c
index e07b21b69..8e12dcd63 100644
--- a/criu/uffd.c
+++ b/criu/uffd.c
@@ -668,12 +668,11 @@ static int remap_iovs(struct lazy_pages_info *lpi, unsigned long from, unsigned
  */
 static int collect_iovs(struct lazy_pages_info *lpi)
 {
+	unsigned long start, end, len, nr_pages = 0;
+	int n_vma = 0, max_iov_len = 0, ret = -1;
 	struct page_read *pr = &lpi->pr;
 	struct lazy_iov *iov;
 	MmEntry *mm;
-	int nr_pages = 0, n_vma = 0, max_iov_len = 0;
-	int ret = -1;
-	unsigned long start, end, len;
 
 	mm = init_mm_entry(lpi);
 	if (!mm)
@@ -728,7 +727,7 @@ free_mm:
 	return ret;
 }
 
-static int uffd_io_complete(struct page_read *pr, unsigned long vaddr, int nr);
+static int uffd_io_complete(struct page_read *pr, unsigned long vaddr, unsigned long nr);
 
 static int ud_open(int client, struct lazy_pages_info **_lpi)
 {
@@ -822,7 +821,7 @@ static bool uffd_recoverable_error(int mcopy_rc)
 	return false;
 }
 
-static int uffd_check_op_error(struct lazy_pages_info *lpi, const char *op, int *nr_pages, long mcopy_rc)
+static int uffd_check_op_error(struct lazy_pages_info *lpi, const char *op, unsigned long *nr_pages, long mcopy_rc)
 {
 	if (errno == ENOSPC || errno == ESRCH) {
 		handle_exit(lpi);
@@ -844,7 +843,7 @@ static int uffd_check_op_error(struct lazy_pages_info *lpi, const char *op, int
 	return 0;
 }
 
-static int uffd_copy(struct lazy_pages_info *lpi, __u64 address, int *nr_pages)
+static int uffd_copy(struct lazy_pages_info *lpi, __u64 address, unsigned long *nr_pages)
 {
 	struct uffdio_copy uffdio_copy;
 	unsigned long len = *nr_pages * page_size();
@@ -865,12 +864,12 @@ static int uffd_copy(struct lazy_pages_info *lpi, __u64 address, int *nr_pages)
 	return 0;
 }
 
-static int uffd_io_complete(struct page_read *pr, unsigned long img_addr, int nr)
+static int uffd_io_complete(struct page_read *pr, unsigned long img_addr, unsigned long nr)
 {
 	struct lazy_pages_info *lpi;
-	unsigned long addr = 0;
-	int req_pages, ret;
+	unsigned long addr = 0, req_pages;
 	struct lazy_iov *req;
+	int ret;
 
 	lpi = container_of(pr, struct lazy_pages_info, pr);
 
@@ -920,7 +919,7 @@ static int uffd_io_complete(struct page_read *pr, unsigned long img_addr, int nr
 	return drop_iovs(lpi, addr, nr * PAGE_SIZE);
 }
 
-static int uffd_zero(struct lazy_pages_info *lpi, __u64 address, int nr_pages)
+static int uffd_zero(struct lazy_pages_info *lpi, __u64 address, unsigned long nr_pages)
 {
 	struct uffdio_zeropage uffdio_zeropage;
 	unsigned long len = page_size() * nr_pages;
@@ -946,7 +945,7 @@ static int uffd_zero(struct lazy_pages_info *lpi, __u64 address, int nr_pages)
  * Returns 0 for zero pages, 1 for "real" pages and negative value on
  * error
  */
-static int uffd_seek_pages(struct lazy_pages_info *lpi, __u64 address, int nr)
+static int uffd_seek_pages(struct lazy_pages_info *lpi, __u64 address, unsigned long nr)
 {
 	int ret;
 
@@ -961,7 +960,7 @@ static int uffd_seek_pages(struct lazy_pages_info *lpi, __u64 address, int nr)
 	return 0;
 }
 
-static int uffd_handle_pages(struct lazy_pages_info *lpi, __u64 address, int nr, unsigned flags)
+static int uffd_handle_pages(struct lazy_pages_info *lpi, __u64 address, unsigned long nr, unsigned flags)
 {
 	int ret;
 
@@ -1003,7 +1002,7 @@ static void update_xfer_len(struct lazy_pages_info *lpi, bool pf)
 static int xfer_pages(struct lazy_pages_info *lpi)
 {
 	struct lazy_iov *iov;
-	unsigned int nr_pages;
+	unsigned long nr_pages;
 	unsigned long len;
 	int err;
 
@@ -1098,6 +1097,8 @@ static int handle_fork(struct lazy_pages_info *parent_lpi, struct uffd_msg *msg)
 
 	lpi_get(lpi->parent);
 
+	page_read_disable_dedup(&parent_lpi->pr);
+	page_read_disable_dedup(&lpi->pr);
 	return 1;
 
 out:
diff --git a/criu/unittest/mock.c b/criu/unittest/mock.c
index e517720e4..b2d507278 100644
--- a/criu/unittest/mock.c
+++ b/criu/unittest/mock.c
@@ -5,6 +5,8 @@
 #include <stdint.h>
 #include <stdlib.h>
 
+#include "compel/infect-util.h"
+
 int add_external(char *key)
 {
 	return 0;
@@ -141,4 +143,4 @@ int check_mount_v2(void)
 	return 0;
 }
 
-uint64_t compel_run_id;
+char compel_run_id[RUN_ID_HASH_LENGTH];
diff --git a/criu/util.c b/criu/util.c
index 5f69465b4..2eaad35bb 100644
--- a/criu/util.c
+++ b/criu/util.c
@@ -24,11 +24,11 @@
 #include <sys/resource.h>
 #include <netdb.h>
 #include <netinet/in.h>
-#include <netinet/tcp.h>
 #include <sched.h>
 #include <ftw.h>
 #include <time.h>
 #include <libgen.h>
+#include <uuid/uuid.h>
 
 #include "linux/mount.h"
 
@@ -40,6 +40,7 @@
 #include "mem.h"
 #include "namespaces.h"
 #include "criu-log.h"
+#include "util-caps.h"
 
 #include "clone-noasan.h"
 #include "cr_options.h"
@@ -53,6 +54,7 @@
 #include "action-scripts.h"
 
 #include "compel/infect-util.h"
+#include <compel/plugins/std/syscall-codes.h>
 
 #define VMA_OPT_LEN 128
 
@@ -193,6 +195,7 @@ static void vma_opt_str(const struct vma_area *v, char *opt)
 	opt2s(VMA_ANON_PRIVATE, "ap");
 	opt2s(VMA_AREA_SYSVIPC, "sysv");
 	opt2s(VMA_AREA_SOCKET, "sk");
+	opt2s(VMA_AREA_UPROBES, "uprobes");
 
 #undef opt2s
 }
@@ -219,10 +222,9 @@ int close_safe(int *fd)
 
 	if (*fd > -1) {
 		ret = close(*fd);
-		if (!ret)
-			*fd = -1;
-		else
-			pr_perror("Unable to close fd %d", *fd);
+		if (ret)
+			pr_perror("Failed closing fd %d", *fd);
+		*fd = -1;
 	}
 
 	return ret;
@@ -517,12 +519,25 @@ int cr_system(int in, int out, int err, char *cmd, char *const argv[], unsigned
 	return cr_system_userns(in, out, err, cmd, argv, flags, -1);
 }
 
-static int close_fds(int minfd)
+int cr_close_range(unsigned int fd, unsigned int max_fd, unsigned int flags)
+{
+	return syscall(__NR_close_range, fd, max_fd, flags);
+}
+
+int close_fds(int minfd)
 {
 	DIR *dir;
 	struct dirent *de;
 	int fd, ret, dfd;
 
+	if (kdat.has_close_range) {
+		if (cr_close_range(minfd, ~0, 0)) {
+			pr_perror("close_range failed");
+			return -1;
+		}
+		return 0;
+	}
+
 	dir = opendir("/proc/self/fd");
 	if (dir == NULL) {
 		pr_perror("Can't open /proc/self/fd");
@@ -660,40 +675,54 @@ out:
 	return ret;
 }
 
+struct child_args {
+	int *sk_pair;
+	int (*child_setup)(void);
+};
+
+static int child_func(void *_args)
+{
+	struct child_args *args = _args;
+	int sk, *sk_pair = args->sk_pair;
+	char c = 0;
+
+	sk = sk_pair[1];
+	close(sk_pair[0]);
+
+	if (args->child_setup && args->child_setup() != 0)
+		exit(1);
+
+	if (write(sk, &c, 1) != 1) {
+		pr_perror("write");
+		exit(1);
+	}
+
+	while (1)
+		sleep(1000);
+	exit(1);
+}
+
 pid_t fork_and_ptrace_attach(int (*child_setup)(void))
 {
 	pid_t pid;
 	int sk_pair[2], sk;
 	char c = 0;
+	struct child_args cargs = {
+		.sk_pair = sk_pair,
+		.child_setup = child_setup,
+	};
 
 	if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) {
 		pr_perror("socketpair");
 		return -1;
 	}
 
-	pid = fork();
+	pid = clone_noasan(child_func, CLONE_UNTRACED | SIGCHLD, &cargs);
 	if (pid < 0) {
 		pr_perror("fork");
 		return -1;
 	}
 
-	if (pid == 0) {
-		sk = sk_pair[1];
-		close(sk_pair[0]);
-
-		if (child_setup && child_setup() != 0)
-			exit(1);
-
-		if (write(sk, &c, 1) != 1) {
-			pr_perror("write");
-			exit(1);
-		}
-
-		while (1)
-			sleep(1000);
-		exit(1);
-	}
-
 	sk = sk_pair[0];
 	close(sk_pair[1]);
 
@@ -950,6 +979,89 @@ FILE *fopenat(int dirfd, char *path, char *cflags)
 	return fdopen(tmp, cflags);
 }
 
+int cr_fchown(int fd, uid_t new_uid, gid_t new_gid)
+{
+	struct stat st;
+
+	if (!fchown(fd, new_uid, new_gid))
+		return 0;
+	if (errno != EPERM)
+		return -1;
+
+	if (fstat(fd, &st) < 0) {
+		pr_perror("fstat() after fchown() for fd %d", fd);
+		goto out_eperm;
+	}
+	pr_debug("fstat(%d): uid %u gid %u\n", fd, st.st_uid, st.st_gid);
+
+	if (new_uid != st.st_uid || new_gid != st.st_gid)
+		goto out_eperm;
+
+	return 0;
+out_eperm:
+	errno = EPERM;
+	return -1;
+}
+
+int cr_fchpermat(int dirfd, const char *path, uid_t new_uid, gid_t new_gid, mode_t new_mode, int flags)
+{
+	struct stat st;
+	int ret;
+
+	if (fchownat(dirfd, path, new_uid, new_gid, flags) < 0 && errno != EPERM) {
+		int errno_cpy = errno;
+		pr_perror("Unable to change [%d]/%s ownership to (%d, %d)",
+			  dirfd, path, new_uid, new_gid);
+		errno = errno_cpy;
+		return -1;
+	}
+
+	if (fstatat(dirfd, path, &st, flags) < 0) {
+		int errno_cpy = errno;
+		pr_perror("Unable to stat [%d]/%s", dirfd, path);
+		errno = errno_cpy;
+		return -1;
+	}
+
+	if (new_uid != st.st_uid || new_gid != st.st_gid) {
+		errno = EPERM;
+		pr_perror("Unable to change [%d]/%s ownership (%d, %d) to (%d, %d)",
+			  dirfd, path, st.st_uid, st.st_gid, new_uid, new_gid);
+		errno = EPERM;
+		return -1;
+	}
+
+	if (new_mode == st.st_mode)
+		return 0;
+
+	if (S_ISLNK(st.st_mode)) {
+		/*
+		 * We have no lchmod() function, and fchmod() will fail on
+		 * O_PATH | O_NOFOLLOW fd. Yes, we have fchmodat()
+		 * function and flag AT_SYMLINK_NOFOLLOW described in
+		 * man 2 fchmodat, but it is not currently implemented. %)
+		 */
+		return 0;
+	}
+
+	if (!*path && flags & AT_EMPTY_PATH)
+		ret = fchmod(dirfd, new_mode);
+	else
+		ret = fchmodat(dirfd, path, new_mode, flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH));
+	if (ret < 0) {
+		int errno_cpy = errno;
+		pr_perror("Unable to set perms %o on [%d]/%s", new_mode, dirfd, path);
+		errno = errno_cpy;
+	}
+
+	return ret;
+}
+
+int cr_fchperm(int fd, uid_t new_uid, gid_t new_gid, mode_t new_mode)
+{
+	return cr_fchpermat(fd, "", new_uid, new_gid, new_mode, AT_EMPTY_PATH);
+}
+
 void split(char *str, char token, char ***out, int *n)
 {
 	int i;
@@ -1070,20 +1182,6 @@ const char *ns_to_string(unsigned int ns)
 	}
 }
 
-void tcp_cork(int sk, bool on)
-{
-	int val = on ? 1 : 0;
-	if (setsockopt(sk, SOL_TCP, TCP_CORK, &val, sizeof(val)))
-		pr_perror("Unable to restore TCP_CORK (%d)", val);
-}
-
-void tcp_nodelay(int sk, bool on)
-{
-	int val = on ? 1 : 0;
-	if (setsockopt(sk, SOL_TCP, TCP_NODELAY, &val, sizeof(val)))
-		pr_perror("Unable to restore TCP_NODELAY (%d)", val);
-}
-
 static int get_sockaddr_in(struct sockaddr_storage *addr, char *host, unsigned short port)
 {
 	memset(addr, 0, sizeof(*addr));
@@ -1425,6 +1523,9 @@ void rlimit_unlimit_nofile(void)
 {
 	struct rlimit new;
 
+	if (opts.unprivileged && !has_cap_sys_resource(opts.cap_eff))
+		return;
+
 	new.rlim_cur = kdat.sysctl_nr_open;
 	new.rlim_max = kdat.sysctl_nr_open;
 
@@ -1455,23 +1556,78 @@ void print_stack_trace(pid_t pid)
 }
 #endif
 
+int cr_fsopen(const char *fsname, unsigned int flags)
+{
+	return syscall(__NR_fsopen, fsname, flags);
+}
+
+int cr_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux)
+{
+	int ret = syscall(__NR_fsconfig, fd, cmd, key, value, aux);
+	if (ret)
+		fsfd_dump_messages(fd);
+	return ret;
+}
+
+int cr_fsmount(int fd, unsigned int flags, unsigned int attr_flags)
+{
+	int ret = syscall(__NR_fsmount, fd, flags, attr_flags);
+	if (ret)
+		fsfd_dump_messages(fd);
+	return ret;
+}
+
+void fsfd_dump_messages(int fd)
+{
+        char buf[4096];
+        int err, n;
+
+        err = errno;
+
+        for (;;) {
+                n = read(fd, buf, sizeof(buf) - 1);
+                if (n < 0) {
+			if (errno != ENODATA)
+				pr_perror("Unable to read from fs descriptor");
+                        break;
+		}
+		buf[n] = 0;
+
+                switch (buf[0]) {
+                case 'w':
+                        pr_warn("%s\n", buf);
+                        break;
+                case 'i':
+                        pr_info("%s\n", buf);
+                        break;
+                case 'e':
+			/* fallthrough */
+		default:
+                        pr_err("%s\n", buf);
+                        break;
+                }
+        }
+
+        errno = err;
+}
+
 int mount_detached_fs(const char *fsname)
 {
 	int fsfd, fd;
 
-	fsfd = sys_fsopen(fsname, 0);
+	fsfd = cr_fsopen(fsname, 0);
 	if (fsfd < 0) {
 		pr_perror("Unable to open the %s file system", fsname);
 		return -1;
 	}
 
-	if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) {
+	if (cr_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) {
 		pr_perror("Unable to create the %s file system", fsname);
 		close(fsfd);
 		return -1;
 	}
 
-	fd = sys_fsmount(fsfd, 0, 0);
+	fd = cr_fsmount(fsfd, 0, 0);
 	if (fd < 0)
 		pr_perror("Unable to mount the %s file system", fsname);
 	close(fsfd);
@@ -1561,7 +1717,7 @@ static int is_iptables_nft(char *bin)
 		goto err;
 	}
 
-	ret = cr_system(-1, pfd[1], -1, cmd[0], cmd, 0);
+	ret = cr_system(-1, pfd[1], -1, cmd[0], cmd, CRS_CAN_FAIL);
 	if (ret) {
 		pr_err("%s -V failed\n", cmd[0]);
 		goto err;
@@ -1589,44 +1745,46 @@ err:
 	return ret;
 }
 
-char *get_legacy_iptables_bin(bool ipv6)
+char *get_legacy_iptables_bin(bool ipv6, bool restore)
 {
-	static char iptables_bin[2][32];
+	static char iptables_bin[2][2][32];
 	/* 0  - means we don't know yet,
 	 * -1 - not present,
 	 * 1  - present.
 	 */
-	static int iptables_present[2] = { 0, 0 };
-	char bins[2][2][32] = { { "iptables-save", "iptables-legacy-save" },
-				{ "ip6tables-save", "ip6tables-legacy-save" } };
+	static int iptables_present[2][2] = { { 0, 0 }, { 0, 0 } };
+	char bins[2][2][2][32] = { { { "iptables-save", "iptables-legacy-save" },
+				     { "iptables-restore", "iptables-legacy-restore" } },
+				   { { "ip6tables-save", "ip6tables-legacy-save" },
+				     { "ip6tables-restore", "ip6tables-legacy-restore" } } };
 	int ret;
 
-	if (iptables_present[ipv6] == -1)
+	if (iptables_present[ipv6][restore] == -1)
 		return NULL;
 
-	if (iptables_present[ipv6] == 1)
-		return iptables_bin[ipv6];
+	if (iptables_present[ipv6][restore] == 1)
+		return iptables_bin[ipv6][restore];
 
-	memcpy(iptables_bin[ipv6], bins[ipv6][0], strlen(bins[ipv6][0]) + 1);
-	ret = is_iptables_nft(iptables_bin[ipv6]);
+	memcpy(iptables_bin[ipv6][restore], bins[ipv6][restore][0], strlen(bins[ipv6][restore][0]) + 1);
+	ret = is_iptables_nft(iptables_bin[ipv6][restore]);
 
 	/*
 	 * iptables on host uses nft backend (or not installed),
 	 * let's try iptables-legacy
 	 */
 	if (ret < 0 || ret == 1) {
-		memcpy(iptables_bin[ipv6], bins[ipv6][1], strlen(bins[ipv6][1]) + 1);
-		ret = is_iptables_nft(iptables_bin[ipv6]);
+		memcpy(iptables_bin[ipv6][restore], bins[ipv6][restore][1], strlen(bins[ipv6][restore][1]) + 1);
+		ret = is_iptables_nft(iptables_bin[ipv6][restore]);
 		if (ret < 0 || ret == 1) {
-			iptables_present[ipv6] = -1;
+			iptables_present[ipv6][restore] = -1;
 			return NULL;
 		}
 	}
 
 	/* we can come here with iptables-save or iptables-legacy-save */
-	iptables_present[ipv6] = 1;
+	iptables_present[ipv6][restore] = 1;
 
-	return iptables_bin[ipv6];
+	return iptables_bin[ipv6][restore];
 }
 
 /*
@@ -1869,15 +2027,16 @@ int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
 	return fret;
 }
 
-uint64_t criu_run_id;
+char criu_run_id[RUN_ID_HASH_LENGTH];
 
-void util_init()
+void util_init(void)
 {
-	struct timespec tp;
+	uuid_t uuid;
 
-	clock_gettime(CLOCK_MONOTONIC, &tp);
-	criu_run_id = ((uint64_t)getpid() << 32) + tp.tv_sec + tp.tv_nsec;
-	compel_run_id = criu_run_id;
+	uuid_generate(uuid);
+	uuid_unparse(uuid, criu_run_id);
+	pr_info("CRIU run id = %s\n", criu_run_id);
+	memcpy(compel_run_id, criu_run_id, sizeof(criu_run_id));
 }
 
 /*
@@ -2063,3 +2222,21 @@ out:
 	xfree(free_path);
 	return mp_path;
 }
+
+int set_opts_cap_eff(void)
+{
+	struct __user_cap_header_struct cap_header;
+	struct __user_cap_data_struct cap_data[_LINUX_CAPABILITY_U32S_3];
+	int i;
+
+	cap_header.version = _LINUX_CAPABILITY_VERSION_3;
+	cap_header.pid = getpid();
+
+	if (capget(&cap_header, &cap_data[0]))
+		return -1;
+
+	for (i = 0; i < _LINUX_CAPABILITY_U32S_3; i++)
+		memcpy(&opts.cap_eff[i], &cap_data[i].effective, sizeof(u32));
+
+	return 0;
+}
diff --git a/criu/vdso.c b/criu/vdso.c
index 1a51f1451..2d9e57c4d 100644
--- a/criu/vdso.c
+++ b/criu/vdso.c
@@ -145,6 +145,9 @@ static void drop_rt_vdso(struct vm_area_list *vma_area_list, struct vdso_quarter
 	 * Also BTW search for rt-vvar to remove it later.
 	 */
 	list_for_each_entry(vma, &vma_area_list->h, list) {
+		if (vma_area_is(vma, VMA_AREA_GUARD))
+			continue;
+
 		if (vma->e->start == addr->orig_vdso) {
 			vma->e->status |= VMA_AREA_REGULAR | VMA_AREA_VDSO;
 			pr_debug("vdso: Restore orig vDSO status at %lx\n", (long)vma->e->start);
@@ -276,6 +279,9 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, struct vm_area_list
 	}
 
 	list_for_each_entry(vma, &vma_area_list->h, list) {
+		if (vma_area_is(vma, VMA_AREA_GUARD))
+			continue;
+
 		/*
 		 * Defer handling marked vdso until we walked over
 		 * all vmas and restore potentially remapped vDSO
@@ -310,7 +316,7 @@ static int vdso_parse_maps(pid_t pid, struct vdso_maps *s)
 
 	while (1) {
 		unsigned long start, end;
-		char *has_vdso, *has_vvar;
+		char *has_vdso, *has_vvar, *has_vvar_vclock;
 
 		buf = breadline(&f);
 		if (buf == NULL)
@@ -318,13 +324,19 @@ static int vdso_parse_maps(pid_t pid, struct vdso_maps *s)
 		if (IS_ERR(buf))
 			goto err;
 
-		has_vdso = strstr(buf, "[vdso]");
-		if (!has_vdso)
+		has_vvar = NULL;
+		has_vvar_vclock = NULL;
+		do {
+			has_vdso = strstr(buf, "[vdso]");
+			if (has_vdso)
+				break;
 			has_vvar = strstr(buf, "[vvar]");
-		else
-			has_vvar = NULL;
+			if (has_vvar)
+				break;
+			has_vvar_vclock = strstr(buf, "[vvar_vclock]");
+		} while (0);
 
-		if (!has_vdso && !has_vvar)
+		if (!has_vdso && !has_vvar && !has_vvar_vclock)
 			continue;
 
 		if (sscanf(buf, "%lx-%lx", &start, &end) != 2) {
@@ -339,13 +351,21 @@ static int vdso_parse_maps(pid_t pid, struct vdso_maps *s)
 			}
 			s->vdso_start = start;
 			s->sym.vdso_size = end - start;
-		} else {
+		} else if (has_vvar) {
 			if (s->vvar_start != VVAR_BAD_ADDR) {
 				pr_err("Got second VVAR entry\n");
 				goto err;
 			}
 			s->vvar_start = start;
 			s->sym.vvar_size = end - start;
+		} else {
+			if (s->vvar_start == VDSO_BAD_ADDR ||
+			    s->vvar_start + s->sym.vvar_size != start) {
+				pr_err("VVAR and VVAR_VCLOCK entries are not subsequent\n");
+				goto err;
+			}
+			s->sym.vvar_vclock_size = end - start;
+			s->sym.vvar_size += s->sym.vvar_vclock_size;
 		}
 	}
 
@@ -479,7 +499,7 @@ out_close:
 	return ret;
 }
 
-#define COMPAT_VDSO_BUF_SZ (PAGE_SIZE * 2)
+#define COMPAT_VDSO_BUF_SZ (PAGE_SIZE * 4)
 static int vdso_fill_compat_symtable(struct vdso_maps *native, struct vdso_maps *compat)
 {
 	void *vdso_mmap;
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 000000000..90c914452
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,61 @@
+{
+  "nodes": {
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1731533236,
+        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1744463964,
+        "narHash": "sha256-LWqduOgLHCFxiTNYi3Uj5Lgz0SR+Xhw3kr/3Xd0GPTM=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "2631b0b7abcea6e640ce31cd78ea58910d31e650",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 000000000..dc2429ffc
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,77 @@
+{
+  description = "CRIU development environment";
+
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+    flake-utils.url = "github:numtide/flake-utils";
+  };
+
+  outputs = { self, nixpkgs, flake-utils }:
+    flake-utils.lib.eachDefaultSystem (system:
+      let
+        pkgs = nixpkgs.legacyPackages.${system};
+
+        # Dependencies for CRIU
+        criuDeps = with pkgs; [
+          # Compiler and build essentials
+          gcc
+          gnumake
+          pkg-config
+
+          # Protocol Buffers
+          protobuf
+          protobufc
+          python3Packages.protobuf
+
+          # Other required libraries
+          libuuid
+          libbsd
+          iproute2
+          nftables
+          libcap
+          libnet
+          libnl
+          libaio
+          gnutls
+          libdrm
+
+          # ZDTM
+          python3Packages.pyyaml
+        ];
+
+        # Multilib support for 32-bit compatibility
+        # criuDeps32bit = with pkgs; [
+        #   glibc.dev
+        #   glibc
+        #   gcc-unwrapped
+        # ];
+
+        devShell = pkgs.mkShell {
+          buildInputs = criuDeps; # ++ (if pkgs.stdenv.isx86_64 then criuDeps32bit else []);
+
+          shellHook = ''
+            echo "CRIU development environment"
+            echo "=============================="
+            echo ""
+            echo "Useful commands:"
+            echo "  make - Build CRIU"
+            echo "  make test - Run tests (requires ZDTM dependencies)"
+            echo ""
+          '';
+
+          # Add proper flags for multilib support
+          # NIX_CFLAGS_COMPILE = pkgs.lib.optional pkgs.stdenv.isx86_64 "-m32";
+
+          # Make sure the shell can find headers for multilib
+          # PKG_CONFIG_PATH = pkgs.lib.makeSearchPath "lib/pkgconfig" criuDeps;
+        };
+      in
+      {
+        # Export the development shell
+        devShells.default = devShell;
+
+        # Build CRIU package as well
+        packages.default = pkgs.criu;
+      }
+    );
+}
diff --git a/images/Makefile b/images/Makefile
index 004e22ec3..2c33152e9 100644
--- a/images/Makefile
+++ b/images/Makefile
@@ -2,10 +2,12 @@ proto-obj-y	+= stats.o
 proto-obj-y	+= core.o
 proto-obj-y	+= core-x86.o
 proto-obj-y	+= core-mips.o
+proto-obj-y	+= core-loongarch64.o
 proto-obj-y	+= core-arm.o
 proto-obj-y	+= core-aarch64.o
 proto-obj-y	+= core-ppc64.o
 proto-obj-y	+= core-s390.o
+proto-obj-y	+= core-riscv64.o
 proto-obj-y	+= cpuinfo.o
 proto-obj-y	+= inventory.o
 proto-obj-y	+= fdinfo.o
@@ -56,7 +58,6 @@ proto-obj-y	+= ext-file.o
 proto-obj-y	+= cgroup.o
 proto-obj-y	+= userns.o
 proto-obj-y	+= pidns.o
-proto-obj-y	+= google/protobuf/descriptor.o # To make protoc-c happy and compile opts.proto
 proto-obj-y	+= opts.o
 proto-obj-y	+= seccomp.o
 proto-obj-y	+= binfmt-misc.o
@@ -72,6 +73,7 @@ proto-obj-y	+= bpfmap-file.o
 proto-obj-y	+= bpfmap-data.o
 proto-obj-y	+= apparmor.o
 proto-obj-y	+= rseq.o
+proto-obj-y	+= pidfd.o
 
 CFLAGS		+= -iquote $(obj)/
 
@@ -88,12 +90,27 @@ endef
 
 makefile-deps := Makefile $(obj)/Makefile
 
+#
+# Generate descriptor.pb-c.c and descriptor.pb-c.h to compile opts.proto.
+DESCRIPTOR_DIR := images/google/protobuf
+GOOGLE_INCLUDE=$(shell pkg-config protobuf --variable=includedir)/google/protobuf
+$(DESCRIPTOR_DIR)/descriptor.pb-c.c: $(GOOGLE_INCLUDE)/descriptor.proto
+	$(call msg-gen, $@)
+	$(Q) protoc --proto_path=/usr/include --proto_path=$(obj)/ --c_out=$(obj)/ $<
+
+cleanup-y += $(DESCRIPTOR_DIR)/descriptor.pb-c.d
+
+submrproper:
+	$(Q) rm -f $(DESCRIPTOR_DIR)/*
+.PHONY: submrproper
+mrproper: submrproper
+
 #
 # Generates rules needed to compile protobuf files.
 define gen-proto-rules
 $(obj)/$(1).pb-c.c $(obj)/$(1).pb-c.h: $(obj)/$(1).proto $(addsuffix .pb-c.c,$(addprefix $(obj)/,$(2))) $(makefile-deps)
 	$$(E) "  PBCC    " $$@
-	$$(Q) protoc-c --proto_path=$(obj)/ --c_out=$(obj)/ $$<
+	$$(Q) protoc --proto_path=$(obj)/ --c_out=$(obj)/ $$<
 ifeq ($(PROTOUFIX),y)
 	$$(Q) sed -i -e 's/4294967295/0xFFFFFFFF/g' $$@
 	$$(Q) sed -i -e 's/4294967295/0xFFFFFFFF/g' $$(patsubst %.c,%.h,$$@)
diff --git a/images/cgroup.proto b/images/cgroup.proto
index ee0354124..02f226835 100644
--- a/images/cgroup.proto
+++ b/images/cgroup.proto
@@ -24,6 +24,7 @@ message cgroup_dir_entry {
 message cg_controller_entry {
 	repeated string			cnames		= 1;
 	repeated cgroup_dir_entry	dirs		= 2;
+	optional bool			is_threaded	= 3;
 }
 
 message cg_member_entry {
diff --git a/images/core-aarch64.proto b/images/core-aarch64.proto
index 3356e6b75..a94911c0b 100644
--- a/images/core-aarch64.proto
+++ b/images/core-aarch64.proto
@@ -17,9 +17,38 @@ message user_aarch64_fpsimd_context_entry {
 	required uint32 fpcr	= 3;
 }
 
+message user_aarch64_gcs_entry {
+	required uint64 gcspr_el0		= 1 [(criu).hex = true];
+	required uint64 features_enabled	= 2 [(criu).hex = true];
+}
+
+message pac_address_keys {
+	required uint64 apiakey_lo = 1;
+	required uint64 apiakey_hi = 2;
+	required uint64 apibkey_lo = 3;
+	required uint64 apibkey_hi = 4;
+	required uint64 apdakey_lo = 5;
+	required uint64 apdakey_hi = 6;
+	required uint64 apdbkey_lo = 7;
+	required uint64 apdbkey_hi = 8;
+	required uint64 pac_enabled_key = 9;
+}
+
+message pac_generic_keys {
+	required uint64 apgakey_lo = 1;
+	required uint64 apgakey_hi = 2;
+}
+
+message pac_keys {
+	optional pac_address_keys pac_address_keys = 6;
+	optional pac_generic_keys pac_generic_keys = 7;
+}
+
 message thread_info_aarch64 {
 	required uint64			 		clear_tid_addr	= 1[(criu).hex = true];
 	required uint64					tls		= 2;
 	required user_aarch64_regs_entry		gpregs		= 3[(criu).hex = true];
 	required user_aarch64_fpsimd_context_entry	fpsimd		= 4;
+	optional pac_keys				pac_keys = 5;
+	optional user_aarch64_gcs_entry			gcs 		= 6;
 }
diff --git a/images/core-loongarch64.proto b/images/core-loongarch64.proto
new file mode 100755
index 000000000..8258f006e
--- /dev/null
+++ b/images/core-loongarch64.proto
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: MIT
+
+syntax = "proto2";
+
+import "opts.proto";
+
+message user_loongarch64_gpregs_entry {
+	repeated uint64 regs	= 1;
+	required uint64 pc		= 2;
+}
+
+message user_loongarch64_fpregs_entry {
+	repeated uint64 regs    = 1;
+	required uint64 fcc     = 2;
+	required uint32 fcsr    = 3;
+}
+
+message thread_info_loongarch64 {
+	required uint64	clear_tid_addr	= 1[(criu).hex = true];
+	required uint64	tls				= 2;
+	required user_loongarch64_gpregs_entry	gpregs	= 3[(criu).hex = true];
+	required user_loongarch64_fpregs_entry	fpregs	= 4[(criu).hex = true];
+}
diff --git a/images/core-mips.proto b/images/core-mips.proto
old mode 100755
new mode 100644
diff --git a/images/core-riscv64.proto b/images/core-riscv64.proto
new file mode 100644
index 000000000..1ddfdd8bd
--- /dev/null
+++ b/images/core-riscv64.proto
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: MIT
+
+syntax = "proto2";
+
+import "opts.proto";
+
+// Refer to riscv-gnu-toolchain/linux-headers/include/asm/ptrace.h
+message user_riscv64_regs_entry {
+    required uint64 pc = 1;
+    required uint64 ra = 2;
+    required uint64 sp = 3;
+    required uint64 gp = 4;
+    required uint64 tp = 5;
+    required uint64 t0 = 6;
+    required uint64 t1 = 7;
+    required uint64 t2 = 8;
+    required uint64 s0 = 9;
+    required uint64 s1 = 10;
+    required uint64 a0 = 11;
+    required uint64 a1 = 12;
+    required uint64 a2 = 13;
+    required uint64 a3 = 14;
+    required uint64 a4 = 15;
+    required uint64 a5 = 16;
+    required uint64 a6 = 17;
+    required uint64 a7 = 18;
+    required uint64 s2 = 19;
+    required uint64 s3 = 20;
+    required uint64 s4 = 21;
+    required uint64 s5 = 22;
+    required uint64 s6 = 23;
+    required uint64 s7 = 24;
+    required uint64 s8 = 25;
+    required uint64 s9 = 26;
+    required uint64 s10 = 27;
+    required uint64 s11 = 28;
+    required uint64 t3 = 29;
+    required uint64 t4 = 30;
+    required uint64 t5 = 31;
+    required uint64 t6 = 32;    
+}
+
+message user_riscv64_d_ext_entry {
+    repeated uint64 f = 1;
+    required uint32 fcsr = 2;
+}
+
+message thread_info_riscv64 {
+	required uint64				clear_tid_addr	= 1[(criu).hex = true];
+	required uint64				tls		= 2;
+	required user_riscv64_regs_entry	gpregs		= 3[(criu).hex = true];
+	required user_riscv64_d_ext_entry	fpsimd		= 4;
+}
diff --git a/images/core-x86.proto b/images/core-x86.proto
index 815cf21ff..762418d73 100644
--- a/images/core-x86.proto
+++ b/images/core-x86.proto
@@ -41,6 +41,11 @@ message user_x86_regs_entry {
 	optional user_x86_regs_mode	mode		= 28 [default = NATIVE];
 }
 
+message user_x86_cet_entry {
+	required uint64			cet		=  1[(criu).hex = true];
+	required uint64			ssp		=  2[(criu).hex = true];
+}
+
 message user_x86_xsave_entry {
 	/* standard xsave features */
 	required uint64			xstate_bv	=  1;
@@ -60,6 +65,9 @@ message user_x86_xsave_entry {
 	/* Protected keys */
 	repeated uint32			pkru		=  8;
 
+	/* CET */
+	optional user_x86_cet_entry	cet		=  9;
+
 	/*
 	 * Processor trace (PT) and hardware duty cycling (HDC)
 	 * are supervisor state components and only managed by
diff --git a/images/core.proto b/images/core.proto
index 35079f366..1fa23868b 100644
--- a/images/core.proto
+++ b/images/core.proto
@@ -8,6 +8,8 @@ import "core-aarch64.proto";
 import "core-ppc64.proto";
 import "core-s390.proto";
 import "core-mips.proto";
+import "core-loongarch64.proto";
+import "core-riscv64.proto";
 
 import "rlimit.proto";
 import "timer.proto";
@@ -40,6 +42,7 @@ message task_core_entry {
 	optional task_timers_entry	timers		= 7;
 	optional task_rlimits_entry	rlimits		= 8;
 
+	/* This is deprecated, should be per-thread */
 	optional uint32			cg_set		= 9;
 
 	optional signal_queue_entry	signals_s	= 10;
@@ -60,6 +63,10 @@ message task_core_entry {
 	// Reserved for container relative start time
 	//optional uint64		start_time	= 19;
 	optional uint64			blk_sigset_extended = 20[(criu).hex = true];
+
+	optional uint32			stop_signo = 21;
+
+	optional uint32			membarrier_registration_mask	= 22 [(criu).hex = true];
 }
 
 message task_kobj_ids_entry {
@@ -103,6 +110,7 @@ message thread_core_entry {
 	optional string			comm		= 13;
 	optional uint64			blk_sigset_extended	= 14;
 	optional rseq_entry		rseq_entry	= 15;
+	optional uint32			cg_set		= 16;
 }
 
 message task_rlimits_entry {
@@ -118,6 +126,8 @@ message core_entry {
 		PPC64		= 4;
 		S390		= 5;
 		MIPS		= 6;
+		LOONGARCH64		= 7;
+		RISCV64		= 8;
 	}
 
 	required march			mtype		= 1;
@@ -127,6 +137,8 @@ message core_entry {
 	optional thread_info_ppc64	ti_ppc64	= 9;
 	optional thread_info_s390	ti_s390		= 10;
 	optional thread_info_mips	ti_mips		= 11;
+	optional thread_info_loongarch64	ti_loongarch64  = 12;
+	optional thread_info_riscv64	ti_riscv64	= 13;
 
 	optional task_core_entry	tc		= 3;
 	optional task_kobj_ids_entry	ids		= 4;
diff --git a/images/creds.proto b/images/creds.proto
index 6228f7fcb..932a40ccf 100644
--- a/images/creds.proto
+++ b/images/creds.proto
@@ -24,4 +24,7 @@ message creds_entry {
 	optional string lsm_profile = 15;
 	optional string lsm_sockcreate = 16;
 	optional bytes	apparmor_data	= 17;
+	optional uint32	no_new_privs	= 18;
+
+	repeated uint32 cap_amb = 19;
 }
diff --git a/images/fdinfo.proto b/images/fdinfo.proto
index 88f1c1186..32ec13cf4 100644
--- a/images/fdinfo.proto
+++ b/images/fdinfo.proto
@@ -17,6 +17,7 @@ import "ext-file.proto";
 import "sk-unix.proto";
 import "fifo.proto";
 import "pipe.proto";
+import "pidfd.proto";
 import "tty.proto";
 import "memfd.proto";
 import "bpfmap-file.proto";
@@ -42,6 +43,7 @@ enum fd_types {
 	TIMERFD		= 17;
 	MEMFD		= 18;
 	BPFMAP		= 19;
+	PIDFD           = 20;
 
 	/* Any number above the real used. Not stored to image */
 	CTL_TTY		= 65534;
@@ -78,4 +80,5 @@ message file_entry {
 	optional tty_file_entry		tty	= 19;
 	optional memfd_file_entry	memfd	= 20;
 	optional bpfmap_file_entry	bpf	= 21;
+	optional pidfd_entry		pidfd   = 22;
 }
diff --git a/images/google/protobuf/.gitignore b/images/google/protobuf/.gitignore
new file mode 100644
index 000000000..68359a786
--- /dev/null
+++ b/images/google/protobuf/.gitignore
@@ -0,0 +1,2 @@
+*.c
+*.h
diff --git a/images/google/protobuf/descriptor.proto b/images/google/protobuf/descriptor.proto
deleted file mode 120000
index 07a4c9add..000000000
--- a/images/google/protobuf/descriptor.proto
+++ /dev/null
@@ -1 +0,0 @@
-/usr/include/google/protobuf/descriptor.proto
\ No newline at end of file
diff --git a/images/inventory.proto b/images/inventory.proto
index a735bad1d..feed5b850 100644
--- a/images/inventory.proto
+++ b/images/inventory.proto
@@ -10,6 +10,13 @@ enum lsmtype {
 	APPARMOR	= 2;
 }
 
+// It is not possible to distinguish between an empty repeated field
+// and unset repeated field. To solve this problem and provide backwards
+// compabibility, we use the 'plugins_entry' message.
+message plugins_entry {
+	repeated string			plugins = 12;
+};
+
 message inventory_entry {
 	required uint32			img_version	= 1;
 	optional bool			fdinfo_per_id	= 2;
@@ -21,4 +28,10 @@ message inventory_entry {
 	optional uint32			pre_dump_mode	= 9;
 	optional bool			tcp_close	= 10;
 	optional uint32			network_lock_method	= 11;
+	optional plugins_entry		plugins_entry = 12;
+	// Remember the criu_run_id when CRIU dumped the process.
+	// This is currently used to delete the correct nftables
+	// network locking rule.
+	optional string                 dump_criu_run_id        = 13;
+	optional bool			allow_uprobes	= 14;
 }
diff --git a/images/memfd.proto b/images/memfd.proto
index 0e625416a..bb0be4a6f 100644
--- a/images/memfd.proto
+++ b/images/memfd.proto
@@ -22,4 +22,5 @@ message memfd_inode_entry {
 	required uint32		seals		= 6 [(criu).flags = "seals.flags"];
 	required uint64		inode_id	= 7;
 	optional uint32		hugetlb_flag	= 8;
+	optional uint32		mode		= 9;
 };
diff --git a/images/netdev.proto b/images/netdev.proto
index 748fd0200..42e2bc7d7 100644
--- a/images/netdev.proto
+++ b/images/netdev.proto
@@ -74,4 +74,5 @@ message netns_entry {
 	repeated netns_id nsids		= 7;
 	optional string	ext_key		= 8;
 	repeated sysctl_entry unix_conf	= 9;
+	repeated sysctl_entry ipv4_sysctl = 10;
 }
diff --git a/images/pagemap.proto b/images/pagemap.proto
index e6d341b0f..f2436a51a 100644
--- a/images/pagemap.proto
+++ b/images/pagemap.proto
@@ -10,7 +10,8 @@ message pagemap_head {
 
 message pagemap_entry {
 	required uint64 vaddr		= 1 [(criu).hex = true];
-	required uint32 nr_pages	= 2;
+	required uint32 compat_nr_pages	= 2;
 	optional bool	in_parent	= 3;
 	optional uint32	flags		= 4 [(criu).flags = "pmap.flags" ];
+	optional uint64 nr_pages = 5;
 }
diff --git a/images/pidfd.proto b/images/pidfd.proto
new file mode 100644
index 000000000..a9da3e454
--- /dev/null
+++ b/images/pidfd.proto
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: MIT
+
+syntax = "proto2";
+
+import "fown.proto";
+
+message pidfd_entry {
+	required uint32		id		= 1;
+	required uint32		ino		= 2;
+	required uint32		flags		= 3;
+	required int32		nspid		= 4;
+	required fown_entry	fown		= 5;
+}
diff --git a/images/rpc.proto b/images/rpc.proto
index a6cc5da48..1a4722a9c 100644
--- a/images/rpc.proto
+++ b/images/rpc.proto
@@ -52,6 +52,7 @@ enum criu_cg_mode {
 enum criu_network_lock_method {
 	IPTABLES = 	1;
 	NFTABLES = 	2;
+	SKIP =  	3;
 };
 
 enum criu_pre_dump_mode {
@@ -60,7 +61,8 @@ enum criu_pre_dump_mode {
 };
 
 message criu_opts {
-	required int32			images_dir_fd	= 1;
+	required int32			images_dir_fd	= 1 [default = -1];
+	optional string			images_dir	= 68; /* used only if images_dir_fd == -1 */
 	optional int32			pid		= 2; /* if not set on dump, will dump requesting process */
 
 	optional bool			leave_running	= 3;
@@ -138,6 +140,11 @@ message criu_opts {
 	optional string			lsm_mount_context	= 63;
 	optional criu_network_lock_method	network_lock		= 64 [default = IPTABLES];
 	optional bool			mntns_compat_mode	= 65;
+	optional bool			skip_file_rwx_check	= 66;
+	optional bool			unprivileged		= 67;
+	optional bool			leave_stopped		= 69;
+	optional bool			display_stats		= 70;
+	optional bool			log_to_stderr		= 71;
 /*	optional bool			check_mounts		= 128;	*/
 }
 
diff --git a/images/sk-inet.proto b/images/sk-inet.proto
index 594e29c66..2c709e018 100644
--- a/images/sk-inet.proto
+++ b/images/sk-inet.proto
@@ -5,6 +5,7 @@ syntax = "proto2";
 import "opts.proto";
 import "fown.proto";
 import "sk-opts.proto";
+import "tcp-stream.proto";
 
 message ip_opts_raw_entry {
 	optional bool			hdrincl		= 1;
@@ -17,6 +18,10 @@ message ip_opts_entry {
 	optional bool			freebind	= 1;
 	// Fields 2 and 3 are reserved for vz7 use
 	optional ip_opts_raw_entry	raw		= 4;
+
+	optional bool			pktinfo		= 5;
+	optional uint32			tos		= 6;
+	optional uint32			ttl		= 7;
 }
 
 message inet_sk_entry {
@@ -52,4 +57,5 @@ message inet_sk_entry {
 	optional string			ifname		= 17;
 	optional uint32			ns_id		= 18;
 	optional sk_shutdown		shutdown	= 19;
+	optional tcp_opts_entry		tcp_opts	= 20;
 }
diff --git a/images/sk-opts.proto b/images/sk-opts.proto
index 1d24d47cc..2f9d4e5c3 100644
--- a/images/sk-opts.proto
+++ b/images/sk-opts.proto
@@ -26,9 +26,12 @@ message sk_opts_entry {
 	optional bool		so_reuseport	= 17;
 	optional bool		so_broadcast	= 18;
 	optional bool		so_keepalive	= 19;
+
+	/* These three are deprecated, use tcp_opts_entry instead */
 	optional uint32		tcp_keepcnt	= 20;
 	optional uint32		tcp_keepidle	= 21;
 	optional uint32		tcp_keepintvl	= 22;
+
 	optional uint32		so_oobinline	= 23;
 	optional uint32		so_linger	= 24;
 
diff --git a/images/tcp-stream.proto b/images/tcp-stream.proto
index c2244ba3b..3d834159f 100644
--- a/images/tcp-stream.proto
+++ b/images/tcp-stream.proto
@@ -4,6 +4,14 @@ syntax = "proto2";
 
 import "opts.proto";
 
+message tcp_opts_entry {
+	optional bool		cork		= 1;
+	optional bool		nodelay		= 2;
+	optional uint32		keepcnt		= 3;
+	optional uint32		keepidle	= 4;
+	optional uint32		keepintvl	= 5;
+}
+
 message tcp_stream_entry {
 	required uint32		inq_len		= 1;
 	required uint32		inq_seq		= 2;
@@ -16,6 +24,7 @@ message tcp_stream_entry {
 	optional uint32		rcv_wscale	= 8;
 	optional uint32		timestamp	= 9;
 
+	/* These two are deprecated, use tcp_opts_entry instead */
 	optional bool		cork		= 10;
 	optional bool		nodelay		= 11;
 
diff --git a/include/common/arch/aarch64/asm/page.h b/include/common/arch/aarch64/asm/page.h
index 90670d126..4555debbd 100644
--- a/include/common/arch/aarch64/asm/page.h
+++ b/include/common/arch/aarch64/asm/page.h
@@ -10,7 +10,7 @@
 extern unsigned __page_size;
 extern unsigned __page_shift;
 
-static inline unsigned page_size(void)
+static inline unsigned long page_size(void)
 {
 	if (!__page_size)
 		__page_size = sysconf(_SC_PAGESIZE);
@@ -37,7 +37,7 @@ static inline unsigned page_shift(void)
 
 #else /* CR_NOGLIBC */
 
-extern unsigned page_size(void);
+extern unsigned long page_size(void);
 #define PAGE_SIZE page_size()
 
 #endif /* CR_NOGLIBC */
diff --git a/include/common/arch/loongarch64/asm/atomic.h b/include/common/arch/loongarch64/asm/atomic.h
new file mode 100644
index 000000000..901725439
--- /dev/null
+++ b/include/common/arch/loongarch64/asm/atomic.h
@@ -0,0 +1,62 @@
+#ifndef __CR_ATOMIC_H__
+#define __CR_ATOMIC_H__
+
+#include <linux/types.h>
+#include "common/compiler.h"
+
+typedef struct {
+	int counter;
+} atomic_t;
+
+static inline int atomic_read(const atomic_t *v)
+{
+	return (*(volatile int *)&(v)->counter);
+}
+
+static inline void atomic_set(atomic_t *v, int i)
+{
+	v->counter = i;
+}
+
+static inline int __atomic_add(int i, atomic_t *v)
+{
+	int result;
+	asm volatile("amadd_db.w %1, %2, %0" : "+ZB"(v->counter), "=&r"(result) : "r"(i) : "memory");
+	return result + i;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+	__atomic_add(i, v);
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	return __atomic_add(i, v);
+}
+
+#define atomic_sub(i, v)	atomic_add(-(int)i, v)
+#define atomic_sub_return(i, v) atomic_add_return(-(int)i, v)
+#define atomic_inc(v)		atomic_add(1, v)
+#define atomic_inc_return(v)	atomic_add_return(1, v)
+#define atomic_dec(v)		atomic_sub(1, v)
+#define atomic_dec_return(v)	atomic_sub_return(1, v)
+
+static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
+{
+	int ret;
+	asm volatile("1:                     \n"
+		     " ll.w  %0, %1          \n"
+		     " bne   %0, %2, 2f      \n"
+		     " or    $t0, %3, $zero  \n"
+		     " sc.w  $t0, %1         \n"
+		     " beqz  $t0, 1b         \n"
+		     "2:                     \n"
+		     " dbar  0               \n"
+		     : "=&r"(ret), "+ZB"(ptr->counter)
+		     : "r"(old), "r"(new)
+		     : "t0", "memory");
+	return ret;
+}
+
+#endif /* __CR_ATOMIC_H__ */
diff --git a/include/common/arch/loongarch64/asm/bitops.h b/include/common/arch/loongarch64/asm/bitops.h
new file mode 100644
index 000000000..170e4f736
--- /dev/null
+++ b/include/common/arch/loongarch64/asm/bitops.h
@@ -0,0 +1,24 @@
+#ifndef _LINUX_BITOPS_H
+#define _LINUX_BITOPS_H
+#include "common/asm-generic/bitops.h"
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+
+#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr) ((1UL << ((nr) / BITS_PER_LONG)) - 1)
+static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long res, mask;
+	mask = BIT_MASK(nr);
+	asm volatile("amor_db.d  %0, %2, %1" : "=&r"(res), "+ZB"(addr[BIT_WORD(nr)]) : "r"(mask) : "memory");
+	return (res & mask) != 0;
+}
+
+#endif
diff --git a/include/common/arch/loongarch64/asm/bitsperlong.h b/include/common/arch/loongarch64/asm/bitsperlong.h
new file mode 100644
index 000000000..13d06a384
--- /dev/null
+++ b/include/common/arch/loongarch64/asm/bitsperlong.h
@@ -0,0 +1,6 @@
+#ifndef __CR_BITSPERLONG_H__
+#define __CR_BITSPERLONG_H__
+
+#define BITS_PER_LONG _LOONGARCH_SZLONG
+
+#endif /* __CR_BITSPERLONG_H__ */
diff --git a/include/common/arch/loongarch64/asm/linkage.h b/include/common/arch/loongarch64/asm/linkage.h
new file mode 100644
index 000000000..448acc29f
--- /dev/null
+++ b/include/common/arch/loongarch64/asm/linkage.h
@@ -0,0 +1,19 @@
+#ifndef __CR_LINKAGE_H__
+#define __CR_LINKAGE_H__
+
+#define __ALIGN	    .align 2
+#define __ALIGN_STR ".align 2"
+
+#define GLOBAL(name) \
+	.globl name; \
+name:
+
+#define ENTRY(name)            \
+	.globl name;           \
+	__ALIGN;               \
+	.type name, @function; \
+name:
+
+#define END(sym) .size sym, .- sym
+
+#endif /* __CR_LINKAGE_H__ */
diff --git a/include/common/arch/loongarch64/asm/page.h b/include/common/arch/loongarch64/asm/page.h
new file mode 100644
index 000000000..4fcdb64dc
--- /dev/null
+++ b/include/common/arch/loongarch64/asm/page.h
@@ -0,0 +1,39 @@
+#ifndef __CR_ASM_PAGE_H__
+#define __CR_ASM_PAGE_H__
+
+#define ARCH_HAS_LONG_PAGES
+
+#ifndef CR_NOGLIBC
+#include <string.h> /* ffsl() */
+#include <unistd.h> /* _SC_PAGESIZE */
+
+static unsigned __page_size;
+static unsigned __page_shift;
+
+static inline unsigned long page_size(void)
+{
+	if (!__page_size)
+		__page_size = sysconf(_SC_PAGESIZE);
+	return __page_size;
+}
+
+static inline unsigned page_shift(void)
+{
+	if (!__page_shift)
+		__page_shift = (ffsl(page_size()) - 1);
+	return __page_shift;
+}
+
+#define PAGE_SIZE  page_size()
+#define PAGE_SHIFT page_shift()
+#define PAGE_MASK  (~(PAGE_SIZE - 1))
+
+#define PAGE_PFN(addr) ((addr) / PAGE_SIZE)
+#else /* CR_NOGLIBC */
+
+extern unsigned long page_size(void);
+#define PAGE_SIZE page_size()
+
+#endif /* CR_NOGLIBC */
+
+#endif /* __CR_ASM_PAGE_H__ */
diff --git a/include/common/arch/mips/asm/page.h b/include/common/arch/mips/asm/page.h
index 25bdbc141..4fcdb64dc 100644
--- a/include/common/arch/mips/asm/page.h
+++ b/include/common/arch/mips/asm/page.h
@@ -10,7 +10,7 @@
 static unsigned __page_size;
 static unsigned __page_shift;
 
-static inline unsigned page_size(void)
+static inline unsigned long page_size(void)
 {
 	if (!__page_size)
 		__page_size = sysconf(_SC_PAGESIZE);
@@ -31,7 +31,7 @@ static inline unsigned page_shift(void)
 #define PAGE_PFN(addr) ((addr) / PAGE_SIZE)
 #else /* CR_NOGLIBC */
 
-extern unsigned page_size(void);
+extern unsigned long page_size(void);
 #define PAGE_SIZE page_size()
 
 #endif /* CR_NOGLIBC */
diff --git a/include/common/arch/ppc64/asm/bitops.h b/include/common/arch/ppc64/asm/bitops.h
index dbfa6be7f..54d55da16 100644
--- a/include/common/arch/ppc64/asm/bitops.h
+++ b/include/common/arch/ppc64/asm/bitops.h
@@ -46,6 +46,7 @@
 #define BITS_TO_LONGS(nr)  DIV_ROUND_UP(nr, BITS_PER_LONG)
 
 #define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)]
+#define BITMAP_SIZE(name)	   (sizeof(name) * CHAR_BIT)
 
 #define __stringify_in_c(...) #__VA_ARGS__
 #define stringify_in_c(...)   __stringify_in_c(__VA_ARGS__) " "
@@ -202,8 +203,8 @@ found_middle:
 	return result + __ffs(tmp);
 }
 
-#define for_each_bit(i, bitmask)                                                  \
-	for (i = find_next_bit(bitmask, sizeof(bitmask), 0); i < sizeof(bitmask); \
-	     i = find_next_bit(bitmask, sizeof(bitmask), i + 1))
+#define for_each_bit(i, bitmask)                                                            \
+	for (i = find_next_bit(bitmask, BITMAP_SIZE(bitmask), 0); i < BITMAP_SIZE(bitmask); \
+	     i = find_next_bit(bitmask, BITMAP_SIZE(bitmask), i + 1))
 
 #endif /* __CR_BITOPS_H__ */
diff --git a/include/common/arch/ppc64/asm/page.h b/include/common/arch/ppc64/asm/page.h
index a1ff6718a..2b0c0e504 100644
--- a/include/common/arch/ppc64/asm/page.h
+++ b/include/common/arch/ppc64/asm/page.h
@@ -10,7 +10,7 @@
 extern unsigned __page_size;
 extern unsigned __page_shift;
 
-static inline unsigned page_size(void)
+static inline unsigned long page_size(void)
 {
 	if (!__page_size)
 		__page_size = sysconf(_SC_PAGESIZE);
@@ -37,7 +37,7 @@ static inline unsigned page_shift(void)
 
 #else /* CR_NOGLIBC */
 
-extern unsigned page_size(void);
+extern unsigned long page_size(void);
 #define PAGE_SIZE page_size()
 
 #endif /* CR_NOGLIBC */
diff --git a/include/common/arch/riscv64/asm/atomic.h b/include/common/arch/riscv64/asm/atomic.h
new file mode 100644
index 000000000..4b08bd9fd
--- /dev/null
+++ b/include/common/arch/riscv64/asm/atomic.h
@@ -0,0 +1,109 @@
+#ifndef __CR_ATOMIC_H__
+#define __CR_ATOMIC_H__
+
+typedef struct {
+	int counter;
+} atomic_t;
+
+/* Copied from the Linux header arch/riscv/include/asm/barrier.h */
+
+#define nop() __asm__ __volatile__("nop")
+
+#define RISCV_FENCE(p, s) __asm__ __volatile__("fence " #p "," #s : : : "memory")
+
+/* These barriers need to enforce ordering on both devices or memory. */
+#define mb()  RISCV_FENCE(iorw, iorw)
+#define rmb() RISCV_FENCE(ir, ir)
+#define wmb() RISCV_FENCE(ow, ow)
+
+/* These barriers do not need to enforce ordering on devices, just memory. */
+#define __smp_mb()  RISCV_FENCE(rw, rw)
+#define __smp_rmb() RISCV_FENCE(r, r)
+#define __smp_wmb() RISCV_FENCE(w, w)
+
+#define __smp_store_release(p, v)                   \
+	do {                                        \
+		compiletime_assert_atomic_type(*p); \
+		RISCV_FENCE(rw, w);                 \
+		WRITE_ONCE(*p, v);                  \
+	} while (0)
+
+#define __smp_load_acquire(p)                       \
+	({                                          \
+		typeof(*p) ___p1 = READ_ONCE(*p);   \
+		compiletime_assert_atomic_type(*p); \
+		RISCV_FENCE(r, rw);                 \
+		___p1;                              \
+	})
+
+/* Copied from the Linux kernel header arch/riscv/include/asm/atomic.h */
+
+static inline int atomic_read(const atomic_t *v)
+{
+	return (*(volatile int *)&(v)->counter);
+}
+
+static inline void atomic_set(atomic_t *v, int i)
+{
+	v->counter = i;
+}
+
+#define atomic_get atomic_read
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	int result;
+
+	asm volatile("amoadd.w.aqrl %1, %2, %0" : "+A"(v->counter), "=r"(result) : "r"(i) : "memory");
+	__smp_mb();
+	return result + i;
+}
+
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+	return atomic_add_return(-i, v);
+}
+
+static inline int atomic_inc(atomic_t *v)
+{
+	return atomic_add_return(1, v) - 1;
+}
+
+static inline int atomic_add(int val, atomic_t *v)
+{
+	return atomic_add_return(val, v) - val;
+}
+
+static inline int atomic_dec(atomic_t *v)
+{
+	return atomic_sub_return(1, v) + 1;
+}
+
+/* true if the result is 0, or false for all other cases. */
+#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
+#define atomic_dec_return(v)   (atomic_sub_return(1, v))
+
+#define atomic_inc_return(v) (atomic_add_return(1, v))
+
+static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
+{
+	unsigned long tmp;
+	int oldval;
+
+	__smp_mb();
+
+	asm volatile("1:\n"
+		     "  lr.w %1, %2\n"
+		     "  bne %1, %3, 2f\n"
+		     "  sc.w %0, %4, %2\n"
+		     "  bnez %0, 1b\n"
+		     "2:"
+		     : "=&r"(tmp), "=&r"(oldval), "+A"(ptr->counter)
+		     : "r"(old), "r"(new)
+		     : "memory");
+
+	__smp_mb();
+	return oldval;
+}
+
+#endif /* __CR_ATOMIC_H__ */
diff --git a/include/common/arch/riscv64/asm/bitops.h b/include/common/arch/riscv64/asm/bitops.h
new file mode 100644
index 000000000..eabab27c7
--- /dev/null
+++ b/include/common/arch/riscv64/asm/bitops.h
@@ -0,0 +1,159 @@
+#ifndef __CR_ASM_BITOPS_H__
+#define __CR_ASM_BITOPS_H__
+
+#include "common/compiler.h"
+#include "common/asm/bitsperlong.h"
+
+#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
+#define BITS_TO_LONGS(nr)  DIV_ROUND_UP(nr, BITS_PER_LONG)
+
+#define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)]
+#define BITMAP_SIZE(name)	   (sizeof(name) * CHAR_BIT)
+
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
+/* Technically wrong, but this avoids compilation errors on some gcc
+   versions. */
+#define BITOP_ADDR(x) "=m"(*(volatile long *)(x))
+#else
+#define BITOP_ADDR(x) "+m"(*(volatile long *)(x))
+#endif
+
+#define ADDR BITOP_ADDR(addr)
+
+static inline void set_bit(int nr, volatile unsigned long *addr)
+{
+	addr += nr / BITS_PER_LONG;
+	*addr |= (1UL << (nr % BITS_PER_LONG));
+}
+
+static inline void change_bit(int nr, volatile unsigned long *addr)
+{
+	addr += nr / BITS_PER_LONG;
+	*addr ^= (1UL << (nr % BITS_PER_LONG));
+}
+
+static inline int test_bit(int nr, volatile const unsigned long *addr)
+{
+	addr += nr / BITS_PER_LONG;
+	return (*addr & (1UL << (nr % BITS_PER_LONG))) ? -1 : 0;
+}
+
+static inline void clear_bit(int nr, volatile unsigned long *addr)
+{
+	addr += nr / BITS_PER_LONG;
+	*addr &= ~(1UL << (nr % BITS_PER_LONG));
+}
+
+/**
+ * __ffs - find first set bit in word
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+	int p = 0;
+
+	for (; p < 8*sizeof(word); ++p) {
+		if (word & 1) {
+			break;
+		}
+
+		word >>= 1;
+	}
+
+	return p;
+}
+
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+/*
+ * Find the next set bit in a memory region.
+ */
+static inline unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset)
+{
+	const unsigned long *p = addr + BITOP_WORD(offset);
+	unsigned long result = offset & ~(BITS_PER_LONG - 1);
+	unsigned long tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset %= BITS_PER_LONG;
+	if (offset) {
+		tmp = *(p++);
+		tmp &= (~0UL << offset);
+		if (size < BITS_PER_LONG)
+			goto found_first;
+		if (tmp)
+			goto found_middle;
+		size -= BITS_PER_LONG;
+		result += BITS_PER_LONG;
+	}
+	while (size & ~(BITS_PER_LONG - 1)) {
+		if ((tmp = *(p++)))
+			goto found_middle;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+	tmp = *p;
+
+found_first:
+	tmp &= (~0UL >> (BITS_PER_LONG - size));
+	if (tmp == 0UL)		      /* Are any bits set? */
+		return result + size; /* Nope. */
+found_middle:
+	return result + __ffs(tmp);
+}
+
+#define for_each_bit(i, bitmask)                                                            \
+	for (i = find_next_bit(bitmask, BITMAP_SIZE(bitmask), 0); i < BITMAP_SIZE(bitmask); \
+	     i = find_next_bit(bitmask, BITMAP_SIZE(bitmask), i + 1))
+
+
+#define BITS_PER_LONG 64
+
+#define BIT_MASK(nr) ((1##UL) << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
+
+#define __AMO(op) "amo" #op ".d"
+
+#define __test_and_op_bit_ord(op, mod, nr, addr, ord)                        \
+	({                                                                   \
+		unsigned long __res, __mask;                                 \
+		__mask = BIT_MASK(nr);                                       \
+		__asm__ __volatile__(__AMO(op) #ord " %0, %2, %1"            \
+				     : "=r"(__res), "+A"(addr[BIT_WORD(nr)]) \
+				     : "r"(mod(__mask))                      \
+				     : "memory");                            \
+		((__res & __mask) != 0);                                     \
+	})
+
+#define __op_bit_ord(op, mod, nr, addr, ord)                \
+	__asm__ __volatile__(__AMO(op) #ord " zero, %1, %0" \
+			     : "+A"(addr[BIT_WORD(nr)])     \
+			     : "r"(mod(BIT_MASK(nr)))       \
+			     : "memory");
+
+#define __test_and_op_bit(op, mod, nr, addr) __test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
+#define __op_bit(op, mod, nr, addr)	     __op_bit_ord(op, mod, nr, addr, )
+
+/* Bitmask modifiers */
+#define __NOP(x) (x)
+#define __NOT(x) (~(x))
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation may be reordered on other architectures than x86.
+ */
+static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	return __test_and_op_bit(or, __NOP, nr, addr);
+}
+
+#endif /* __CR_ASM_BITOPS_H__ */
diff --git a/include/common/arch/riscv64/asm/bitsperlong.h b/include/common/arch/riscv64/asm/bitsperlong.h
new file mode 100644
index 000000000..d95727d19
--- /dev/null
+++ b/include/common/arch/riscv64/asm/bitsperlong.h
@@ -0,0 +1,6 @@
+#ifndef __CR_BITSPERLONG_H__
+#define __CR_BITSPERLONG_H__
+
+#define BITS_PER_LONG 64
+
+#endif /* __CR_BITSPERLONG_H__ */
diff --git a/include/common/arch/riscv64/asm/linkage.h b/include/common/arch/riscv64/asm/linkage.h
new file mode 100644
index 000000000..c6d40f750
--- /dev/null
+++ b/include/common/arch/riscv64/asm/linkage.h
@@ -0,0 +1,23 @@
+#ifndef __CR_LINKAGE_H__
+#define __CR_LINKAGE_H__
+
+#ifdef __ASSEMBLY__
+
+#define __ALIGN	    .align 4, 0x00
+#define __ALIGN_STR ".align 4, 0x00"
+
+#define GLOBAL(name) \
+	.globl name; \
+name:
+
+#define ENTRY(name)            \
+	.globl name;           \
+	.type name, @function; \
+	__ALIGN;               \
+name:
+
+#define END(sym) .size sym, .- sym
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __CR_LINKAGE_H__ */
diff --git a/include/common/arch/riscv64/asm/page.h b/include/common/arch/riscv64/asm/page.h
new file mode 100644
index 000000000..5113cb6db
--- /dev/null
+++ b/include/common/arch/riscv64/asm/page.h
@@ -0,0 +1,44 @@
+#ifndef __CR_ASM_PAGE_H__
+#define __CR_ASM_PAGE_H__
+
+#define ARCH_HAS_LONG_PAGES
+
+#ifndef CR_NOGLIBC
+#include <string.h> /* ffsl() */
+#include <unistd.h> /* _SC_PAGESIZE */
+
+extern unsigned __page_size;
+extern unsigned __page_shift;
+
+static inline unsigned page_size(void)
+{
+	if (!__page_size)
+		__page_size = sysconf(_SC_PAGESIZE);
+	return __page_size;
+}
+
+static inline unsigned page_shift(void)
+{
+	if (!__page_shift)
+		__page_shift = (ffsl(page_size()) - 1);
+	return __page_shift;
+}
+
+/*
+ * Don't add ifdefs for PAGE_SIZE: if any header defines it as a constant
+ * on aarch64, then we need refrain using PAGE_SIZE in criu and use
+ * page_size() across sources (as it may differ on aarch64).
+ */
+#define PAGE_SIZE  page_size()
+#define PAGE_MASK  (~(PAGE_SIZE - 1))
+#define PAGE_SHIFT page_shift()
+
+#define PAGE_PFN(addr) ((addr) / PAGE_SIZE)
+
+#else /* CR_NOGLIBC */
+
+extern unsigned long page_size(void);
+#define PAGE_SIZE page_size()
+
+#endif /* CR_NOGLIBC */
+#endif /* __CR_ASM_PAGE_H__ */
diff --git a/include/common/arch/s390/asm/bitops.h b/include/common/arch/s390/asm/bitops.h
index f396721e9..22547c544 100644
--- a/include/common/arch/s390/asm/bitops.h
+++ b/include/common/arch/s390/asm/bitops.h
@@ -10,6 +10,7 @@
 #define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
 
 #define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)]
+#define BITMAP_SIZE(name)	   (sizeof(name) * CHAR_BIT)
 
 static inline unsigned long *__bitops_word(unsigned long nr, volatile unsigned long *ptr)
 {
@@ -143,8 +144,8 @@ static inline unsigned long find_next_bit(const unsigned long *addr, unsigned lo
 	return _find_next_bit(addr, size, offset, 0UL);
 }
 
-#define for_each_bit(i, bitmask)                                                  \
-	for (i = find_next_bit(bitmask, sizeof(bitmask), 0); i < sizeof(bitmask); \
-	     i = find_next_bit(bitmask, sizeof(bitmask), i + 1))
+#define for_each_bit(i, bitmask)                                                            \
+	for (i = find_next_bit(bitmask, BITMAP_SIZE(bitmask), 0); i < BITMAP_SIZE(bitmask); \
+	     i = find_next_bit(bitmask, BITMAP_SIZE(bitmask), i + 1))
 
 #endif /* _S390_BITOPS_H */
diff --git a/include/common/arch/x86/asm/bitops.h b/include/common/arch/x86/asm/bitops.h
index c13c1eb45..f3c7dbbdf 100644
--- a/include/common/arch/x86/asm/bitops.h
+++ b/include/common/arch/x86/asm/bitops.h
@@ -10,6 +10,7 @@
 #define BITS_TO_LONGS(nr)  DIV_ROUND_UP(nr, BITS_PER_LONG)
 
 #define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)]
+#define BITMAP_SIZE(name)	   (sizeof(name) * CHAR_BIT)
 
 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
 /* Technically wrong, but this avoids compilation errors on some gcc
@@ -119,8 +120,8 @@ found_middle:
 	return result + __ffs(tmp);
 }
 
-#define for_each_bit(i, bitmask)                                                  \
-	for (i = find_next_bit(bitmask, sizeof(bitmask), 0); i < sizeof(bitmask); \
-	     i = find_next_bit(bitmask, sizeof(bitmask), i + 1))
+#define for_each_bit(i, bitmask)                                                            \
+	for (i = find_next_bit(bitmask, BITMAP_SIZE(bitmask), 0); i < BITMAP_SIZE(bitmask); \
+	     i = find_next_bit(bitmask, BITMAP_SIZE(bitmask), i + 1))
 
 #endif /* __CR_BITOPS_H__ */
diff --git a/include/common/asm-generic/bitops.h b/include/common/asm-generic/bitops.h
index 004da4c4e..d8f38091d 100644
--- a/include/common/asm-generic/bitops.h
+++ b/include/common/asm-generic/bitops.h
@@ -14,6 +14,7 @@
 #define BITS_TO_LONGS(nr)  DIV_ROUND_UP(nr, BITS_PER_LONG)
 
 #define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)]
+#define BITMAP_SIZE(name)	   (sizeof(name) * CHAR_BIT)
 
 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
 /* Technically wrong, but this avoids compilation errors on some gcc
@@ -103,8 +104,8 @@ found_middle:
 	return result + __ffs(tmp);
 }
 
-#define for_each_bit(i, bitmask)                                                  \
-	for (i = find_next_bit(bitmask, sizeof(bitmask), 0); i < sizeof(bitmask); \
-	     i = find_next_bit(bitmask, sizeof(bitmask), i + 1))
+#define for_each_bit(i, bitmask)                                                            \
+	for (i = find_next_bit(bitmask, BITMAP_SIZE(bitmask), 0); i < BITMAP_SIZE(bitmask); \
+	     i = find_next_bit(bitmask, BITMAP_SIZE(bitmask), i + 1))
 
 #endif /* __CR_GENERIC_BITOPS_H__ */
diff --git a/include/common/compiler.h b/include/common/compiler.h
index bd3de01df..3e66709f9 100644
--- a/include/common/compiler.h
+++ b/include/common/compiler.h
@@ -30,6 +30,17 @@
 #define __always_unused __attribute__((unused))
 #define __must_check	__attribute__((__warn_unused_result__))
 
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+/* Not supported by clang */
+#if __has_attribute(__externally_visible__)
+#define __visible __attribute__((__externally_visible__))
+#else
+#define __visible
+#endif
+
 #define __section(S) __attribute__((__section__(#S)))
 
 #ifndef __always_inline
@@ -47,7 +58,9 @@
 #define noinline __attribute__((noinline))
 #endif
 
+#ifndef __aligned
 #define __aligned(x) __attribute__((aligned(x)))
+#endif
 
 /*
  * Macro to define stack alignment.
@@ -76,6 +89,7 @@
 #define round_down(x, y)   ((x) & ~__round_mask(x, y))
 #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
 #define ALIGN(x, a)	   (((x) + (a)-1) & ~((a)-1))
+#define ALIGN_DOWN(x, a)   ALIGN((x) - ((a) - 1), (a))
 
 #define min(x, y)                              \
 	({                                     \
diff --git a/include/common/lock.h b/include/common/lock.h
index ccfa468b8..4733d7287 100644
--- a/include/common/lock.h
+++ b/include/common/lock.h
@@ -2,6 +2,7 @@
 #define __CR_COMMON_LOCK_H__
 
 #include <stdint.h>
+#include <stdbool.h>
 #include <linux/futex.h>
 #include <sys/time.h>
 #include <limits.h>
@@ -162,6 +163,11 @@ static inline void mutex_lock(mutex_t *m)
 	}
 }
 
+static inline bool mutex_trylock(mutex_t *m)
+{
+	return atomic_inc_return(&m->raw) == 1;
+}
+
 static inline void mutex_unlock(mutex_t *m)
 {
 	uint32_t c = 0;
diff --git a/include/common/scm.h b/include/common/scm.h
index bcb198882..5b6f78a8b 100644
--- a/include/common/scm.h
+++ b/include/common/scm.h
@@ -11,7 +11,7 @@
  * Because of kernel doing kmalloc for user data passed
  * in SCM messages, and there is kernel's SCM_MAX_FD as a limit
  * for descriptors passed at once we're trying to reduce
- * the pressue on kernel memory manager and use predefined
+ * the pressure on kernel memory manager and use predefined
  * known to work well size of the message buffer.
  */
 #define CR_SCM_MSG_SIZE (1024)
diff --git a/lib/.gitignore b/lib/.gitignore
new file mode 100644
index 000000000..a10181b80
--- /dev/null
+++ b/lib/.gitignore
@@ -0,0 +1 @@
+pycriu.egg-info/
diff --git a/lib/Makefile b/lib/Makefile
index 575a7bad3..4b8a6cbb8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -2,10 +2,6 @@ CRIU_SO			:= libcriu.so
 CRIU_A			:= libcriu.a
 UAPI_HEADERS		:= lib/c/criu.h images/rpc.proto images/rpc.pb-c.h criu/include/version.h
 
-#
-# File to keep track of files installed by setup.py
-CRIT_SETUP_FILES	:= lib/.crit-setup.files
-
 all-y	+= lib-c lib-a lib-py
 
 #
@@ -29,23 +25,23 @@ lib-a: lib/c/$(CRIU_A)
 
 #
 # Python bindings.
-lib/py/Makefile: ;
-lib/py/%: .FORCE
+lib/pycriu/Makefile: ;
+lib/pycriu/%: .FORCE
 	$(call msg-gen, $@)
-	$(Q) $(MAKE) $(build)=lib/py $@
+	$(Q) $(MAKE) $(build)=lib/pycriu $@
 lib-py:
-	$(Q) $(MAKE) $(build)=lib/py all
+	$(Q) $(MAKE) $(build)=lib/pycriu all
 .PHONY: lib-py
 
 clean-lib:
 	$(Q) $(MAKE) $(build)=lib/c clean
-	$(Q) $(MAKE) $(build)=lib/py clean
+	$(Q) $(MAKE) $(build)=lib/pycriu clean
 .PHONY: clean-lib
 clean: clean-lib
 cleanup-y	+= lib/c/$(CRIU_SO) lib/c/$(CRIU_A) lib/c/criu.pc
 mrproper: clean
 
-install: lib-c lib-a lib-py crit/crit lib/c/criu.pc.in
+install: lib-c lib-a lib-py lib/c/criu.pc.in
 	$(E) "  INSTALL " lib
 	$(Q) mkdir -p $(DESTDIR)$(LIBDIR)
 	$(Q) install -m 755 lib/c/$(CRIU_SO) $(DESTDIR)$(LIBDIR)/$(CRIU_SO).$(CRIU_SO_VERSION_MAJOR).$(CRIU_SO_VERSION_MINOR)
@@ -58,8 +54,12 @@ install: lib-c lib-a lib-py crit/crit lib/c/criu.pc.in
 	$(Q) mkdir -p $(DESTDIR)$(LIBDIR)/pkgconfig
 	$(Q) sed -e 's,@version@,$(CRIU_VERSION),' -e 's,@libdir@,$(LIBDIR),' -e 's,@includedir@,$(dir $(INCLUDEDIR)/criu/),' lib/c/criu.pc.in > lib/c/criu.pc
 	$(Q) install -m 644 lib/c/criu.pc $(DESTDIR)$(LIBDIR)/pkgconfig
-	$(E) "  INSTALL " crit
-	$(Q) $(PYTHON) scripts/crit-setup.py install --prefix=$(DESTDIR)$(PREFIX) --record $(CRIT_SETUP_FILES)
+ifeq ($(SKIP_PIP_INSTALL),0)
+	$(E) "  INSTALL " pycriu
+	$(Q) $(PYTHON) -m pip install $(PIPFLAGS) --prefix=$(DESTDIR)$(PREFIX) ./lib
+else
+	$(E) " SKIP INSTALL pycriu"
+endif
 .PHONY: install
 
 uninstall:
@@ -71,6 +71,10 @@ uninstall:
 	$(Q) $(RM) $(addprefix $(DESTDIR)$(INCLUDEDIR)/criu/,$(notdir $(UAPI_HEADERS)))
 	$(E) " UNINSTALL" pkgconfig/criu.pc
 	$(Q) $(RM) $(addprefix $(DESTDIR)$(LIBDIR)/pkgconfig/,criu.pc)
-	$(E) " UNINSTALL" crit
-	$(Q) while read -r file; do $(RM) "$$file"; done < $(CRIT_SETUP_FILES)
+ifeq ($(SKIP_PIP_INSTALL),0)
+	$(E) " UNINSTALL" pycriu
+	$(Q) $(PYTHON) ./scripts/uninstall_module.py --prefix=$(DESTDIR)$(PREFIX) pycriu
+else
+	$(E) " SKIP UNINSTALL pycriu"
+endif
 .PHONY: uninstall
diff --git a/lib/c/criu.c b/lib/c/criu.c
index 7807d7bc5..485c8b178 100644
--- a/lib/c/criu.c
+++ b/lib/c/criu.c
@@ -352,8 +352,8 @@ int criu_set_parent_images(const char *path)
 
 int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode)
 {
-	opts->rpc->has_pre_dump_mode = true;
 	if (mode == CRIU_PRE_DUMP_SPLICE || mode == CRIU_PRE_DUMP_READ) {
+		opts->rpc->has_pre_dump_mode = true;
 		opts->rpc->pre_dump_mode = (CriuPreDumpMode)mode;
 		return 0;
 	}
@@ -555,6 +555,28 @@ void criu_set_shell_job(bool shell_job)
 	criu_local_set_shell_job(global_opts, shell_job);
 }
 
+void criu_local_set_skip_file_rwx_check(criu_opts *opts, bool skip_file_rwx_check)
+{
+	opts->rpc->has_skip_file_rwx_check = true;
+	opts->rpc->skip_file_rwx_check = skip_file_rwx_check;
+}
+
+void criu_set_skip_file_rwx_check(bool skip_file_rwx_check)
+{
+	criu_local_set_skip_file_rwx_check(global_opts, skip_file_rwx_check);
+}
+
+void criu_local_set_unprivileged(criu_opts *opts, bool unprivileged)
+{
+	opts->rpc->has_unprivileged = true;
+	opts->rpc->unprivileged = unprivileged;
+}
+
+void criu_set_unprivileged(bool unprivileged)
+{
+	criu_local_set_unprivileged(global_opts, unprivileged);
+}
+
 void criu_local_set_orphan_pts_master(criu_opts *opts, bool orphan_pts_master)
 {
 	opts->rpc->has_orphan_pts_master = true;
@@ -1845,8 +1867,8 @@ void criu_set_pidfd_store_sk(int sk)
 
 int criu_local_set_network_lock(criu_opts *opts, enum criu_network_lock_method method)
 {
-	opts->rpc->has_network_lock = true;
-	if (method == CRIU_NETWORK_LOCK_IPTABLES || method == CRIU_NETWORK_LOCK_NFTABLES) {
+	if (method == CRIU_NETWORK_LOCK_IPTABLES || method == CRIU_NETWORK_LOCK_NFTABLES || method == CRIU_NETWORK_LOCK_SKIP) {
+		opts->rpc->has_network_lock = true;
 		opts->rpc->network_lock = (CriuNetworkLockMethod)method;
 		return 0;
 	}
@@ -2008,3 +2030,33 @@ int criu_feature_check(struct criu_feature_check *features, size_t size)
 {
 	return criu_local_feature_check(global_opts, features, size);
 }
+
+void criu_local_set_empty_ns(criu_opts *opts, int namespaces)
+{
+	opts->rpc->has_empty_ns = true;
+	opts->rpc->empty_ns = namespaces;
+}
+
+void criu_set_empty_ns(int namespaces)
+{
+	criu_local_set_empty_ns(global_opts, namespaces);
+}
+
+int criu_local_set_config_file(criu_opts *opts, const char *path)
+{
+	char *new;
+
+	new = strdup(path);
+	if (!new)
+		return -ENOMEM;
+
+	free(opts->rpc->config_file);
+	opts->rpc->config_file = new;
+
+	return 0;
+}
+
+int criu_set_config_file(const char *path)
+{
+	return criu_local_set_config_file(global_opts, path);
+}
diff --git a/lib/c/criu.h b/lib/c/criu.h
index 7cc6a199c..44446f664 100644
--- a/lib/c/criu.h
+++ b/lib/c/criu.h
@@ -50,6 +50,7 @@ enum criu_cg_mode {
 enum criu_network_lock_method {
 	CRIU_NETWORK_LOCK_IPTABLES = 1,
 	CRIU_NETWORK_LOCK_NFTABLES = 2,
+	CRIU_NETWORK_LOCK_SKIP = 3,
 };
 
 enum criu_pre_dump_mode { CRIU_PRE_DUMP_SPLICE = 1, CRIU_PRE_DUMP_READ = 2 };
@@ -78,6 +79,8 @@ void criu_set_tcp_close(bool tcp_close);
 void criu_set_weak_sysctls(bool val);
 void criu_set_evasive_devices(bool evasive_devices);
 void criu_set_shell_job(bool shell_job);
+void criu_set_skip_file_rwx_check(bool skip_file_rwx_check);
+void criu_set_unprivileged(bool unprivileged);
 void criu_set_orphan_pts_master(bool orphan_pts_master);
 void criu_set_file_locks(bool file_locks);
 void criu_set_track_mem(bool track_mem);
@@ -113,6 +116,7 @@ void criu_set_pidfd_store_sk(int sk);
 int criu_set_network_lock(enum criu_network_lock_method method);
 int criu_join_ns_add(const char *ns, const char *ns_file, const char *extra_opt);
 void criu_set_mntns_compat_mode(bool val);
+int criu_set_config_file(const char *path);
 
 /*
  * The criu_notify_arg_t na argument is an opaque
@@ -238,6 +242,7 @@ void criu_local_set_tcp_close(criu_opts *opts, bool tcp_close);
 void criu_local_set_weak_sysctls(criu_opts *opts, bool val);
 void criu_local_set_evasive_devices(criu_opts *opts, bool evasive_devices);
 void criu_local_set_shell_job(criu_opts *opts, bool shell_job);
+void criu_local_set_skip_file_rwx_check(criu_opts *opts, bool skip_file_rwx_check);
 void criu_local_set_orphan_pts_master(criu_opts *opts, bool orphan_pts_master);
 void criu_local_set_file_locks(criu_opts *opts, bool file_locks);
 void criu_local_set_track_mem(criu_opts *opts, bool track_mem);
@@ -277,6 +282,7 @@ void criu_local_set_pidfd_store_sk(criu_opts *opts, int sk);
 int criu_local_set_network_lock(criu_opts *opts, enum criu_network_lock_method method);
 int criu_local_join_ns_add(criu_opts *opts, const char *ns, const char *ns_file, const char *extra_opt);
 void criu_local_set_mntns_compat_mode(criu_opts *opts, bool val);
+int criu_local_set_config_file(criu_opts *opts, const char *path);
 
 void criu_local_set_notify_cb(criu_opts *opts, int (*cb)(char *action, criu_notify_arg_t na));
 
@@ -319,6 +325,9 @@ struct criu_feature_check {
 int criu_feature_check(struct criu_feature_check *features, size_t size);
 int criu_local_feature_check(criu_opts *opts, struct criu_feature_check *features, size_t size);
 
+void criu_local_set_empty_ns(criu_opts *opts, int namespaces);
+void criu_set_empty_ns(int namespaces);
+
 #ifdef __GNUG__
 }
 #endif
diff --git a/lib/py/.gitignore b/lib/py/.gitignore
deleted file mode 100644
index d3090fca3..000000000
--- a/lib/py/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*_pb2.py
-*.pyc
diff --git a/lib/py/__init__.py b/lib/py/__init__.py
deleted file mode 100644
index 96b3e9526..000000000
--- a/lib/py/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from . import rpc_pb2 as rpc
-from . import images
-from .criu import *
diff --git a/lib/pycriu/.gitignore b/lib/pycriu/.gitignore
new file mode 100644
index 000000000..111642787
--- /dev/null
+++ b/lib/pycriu/.gitignore
@@ -0,0 +1,4 @@
+__pycache__
+*_pb2.py
+*.pyc
+version.py
diff --git a/lib/py/Makefile b/lib/pycriu/Makefile
similarity index 66%
rename from lib/py/Makefile
rename to lib/pycriu/Makefile
index 691b6bdd3..5ce9bc8f7 100644
--- a/lib/py/Makefile
+++ b/lib/pycriu/Makefile
@@ -1,4 +1,4 @@
-all-y	+= libpy-images rpc_pb2.py
+all-y	+= libpy-images rpc_pb2.py version.py
 
 $(obj)/images/Makefile: ;
 $(obj)/images/%: .FORCE
@@ -11,7 +11,10 @@ libpy-images:
 rpc_pb2.py:
 	$(Q) protoc -I=images/ --python_out=$(obj) images/$(@:_pb2.py=.proto)
 
-cleanup-y	+= $(addprefix $(obj)/,rpc_pb2.py *.pyc)
+version.py:
+	$(Q) echo "__version__ = '${CRIU_VERSION}'" > $(obj)/$@
+
+cleanup-y	+= $(addprefix $(obj)/,rpc_pb2.py *.pyc version.py)
 
 clean-lib-py:
 	$(Q) $(MAKE) $(build)=$(obj)/images clean
diff --git a/lib/pycriu/__init__.py b/lib/pycriu/__init__.py
new file mode 100644
index 000000000..28f1e9424
--- /dev/null
+++ b/lib/pycriu/__init__.py
@@ -0,0 +1,15 @@
+from . import rpc_pb2 as rpc
+from . import images
+from .criu import criu, CRIUExceptionExternal, CRIUException
+from .criu import CR_DEFAULT_SERVICE_ADDRESS
+from .version import __version__
+
+__all__ = (
+    "rpc",
+    "images",
+    "criu",
+    "CRIUExceptionExternal",
+    "CRIUException",
+    "CR_DEFAULT_SERVICE_ADDRESS",
+    "__version__",
+)
\ No newline at end of file
diff --git a/lib/py/criu.py b/lib/pycriu/criu.py
similarity index 89%
rename from lib/py/criu.py
rename to lib/pycriu/criu.py
index f3e018095..51a5c2902 100644
--- a/lib/py/criu.py
+++ b/lib/pycriu/criu.py
@@ -8,6 +8,7 @@ import struct
 
 import pycriu.rpc_pb2 as rpc
 
+CR_DEFAULT_SERVICE_ADDRESS = "./criu_service.socket"
 
 class _criu_comm:
     """
@@ -45,7 +46,14 @@ class _criu_comm_sk(_criu_comm):
 
     def connect(self, daemon):
         self.sk = socket.socket(socket.AF_UNIX, socket.SOCK_SEQPACKET)
-        self.sk.connect(self.comm)
+        try:
+            self.sk.connect(self.comm)
+            
+        except FileNotFoundError:
+            raise FileNotFoundError("Socket file not found.")
+        
+        except ConnectionRefusedError:
+            raise ConnectionRefusedError("Service not running.")
 
         return self.sk
 
@@ -103,7 +111,7 @@ class _criu_comm_bin(_criu_comm):
                 os.close(2)
 
                 css[0].send(struct.pack('i', os.getpid()))
-                os.execv(self.comm,
+                os.execvp(self.comm,
                          [self.comm, 'swrk',
                           "%d" % css[0].fileno()])
                 os._exit(1)
@@ -181,15 +189,14 @@ class CRIUExceptionExternal(CRIUException):
         if self.errno == errno.EBADRQC:
             s += "Bad options"
 
-        if self.typ == rpc.DUMP:
-            if self.errno == errno.ESRCH:
-                s += "No process with such pid"
+        elif self.typ == rpc.DUMP and self.errno == errno.ESRCH:
+            s += "No process with such pid"
 
-        if self.typ == rpc.RESTORE:
-            if self.errno == errno.EEXIST:
-                s += "Process with requested pid already exists"
+        elif self.typ == rpc.RESTORE and self.errno == errno.EEXIST:
+            s += "Process with requested pid already exists"
 
-        s += "Unknown"
+        else:
+            s += "Unknown"
 
         return s
 
@@ -204,10 +211,11 @@ class criu:
 
     def __init__(self):
         self.use_binary('criu')
-        self.opts = rpc.criu_opts()
+        # images_dir_fd is required field with default value of -1
+        self.opts = rpc.criu_opts(images_dir_fd=-1)
         self.sk = None
 
-    def use_sk(self, sk_name):
+    def use_sk(self, sk_name=CR_DEFAULT_SERVICE_ADDRESS):
         """
         Access criu using unix socket which that belongs to criu service daemon.
         """
@@ -234,7 +242,7 @@ class criu:
         # process resources from its own if criu is located in a same
         # process tree it is trying to dump.
         daemon = False
-        if req.type == rpc.DUMP and not req.opts.HasField('pid'):
+        if req.type == rpc.DUMP and (not req.opts.HasField('pid') or req.opts.pid == os.getpid()):
             daemon = True
 
         try:
@@ -266,6 +274,7 @@ class criu:
         """
         req = rpc.criu_req()
         req.type = rpc.CHECK
+        req.opts.MergeFrom(self.opts)
 
         resp = self._send_req_and_recv_resp(req)
 
diff --git a/lib/py/images/.gitignore b/lib/pycriu/images/.gitignore
similarity index 100%
rename from lib/py/images/.gitignore
rename to lib/pycriu/images/.gitignore
diff --git a/lib/py/images/Makefile b/lib/pycriu/images/Makefile
similarity index 100%
rename from lib/py/images/Makefile
rename to lib/pycriu/images/Makefile
diff --git a/lib/py/images/__init__.py b/lib/pycriu/images/__init__.py
similarity index 100%
rename from lib/py/images/__init__.py
rename to lib/pycriu/images/__init__.py
diff --git a/lib/py/images/images.py b/lib/pycriu/images/images.py
similarity index 89%
rename from lib/py/images/images.py
rename to lib/pycriu/images/images.py
index eda030a5c..9db506e1e 100644
--- a/lib/py/images/images.py
+++ b/lib/pycriu/images/images.py
@@ -42,7 +42,6 @@ import base64
 import struct
 import os
 import array
-import sys
 
 from . import magic
 from . import pb
@@ -69,6 +68,16 @@ class MagicException(Exception):
         self.magic = magic
 
 
+def decode_base64_data(data):
+    """A helper function to decode base64 data."""
+    return base64.decodebytes(str.encode(data))
+
+
+def write_base64_data(f, data):
+    """A helper function to write base64 encoded data to a file."""
+    f.write(base64.decodebytes(str.encode(data)))
+
+
 # Generic class to handle loading/dumping criu images entries from/to bin
 # format to/from dict(json).
 class entry_handler:
@@ -98,7 +107,7 @@ class entry_handler:
             # Read payload
             pbuff = self.payload()
             buf = f.read(4)
-            if buf == b'':
+            if len(buf) == 0:
                 break
             size, = struct.unpack('i', buf)
             pbuff.ParseFromString(f.read(size))
@@ -172,7 +181,7 @@ class entry_handler:
 
         while True:
             buf = f.read(4)
-            if buf == '':
+            if len(buf) == 0:
                 break
             size, = struct.unpack('i', buf)
             f.seek(size, 1)
@@ -195,7 +204,7 @@ class pagemap_handler:
         pbuff = pb.pagemap_head()
         while True:
             buf = f.read(4)
-            if buf == b'':
+            if len(buf) == 0:
                 break
             size, = struct.unpack('i', buf)
             pbuff.ParseFromString(f.read(size))
@@ -285,15 +294,9 @@ class ghost_file_handler:
                 size = len(pb_str)
                 f.write(struct.pack('i', size))
                 f.write(pb_str)
-                if (sys.version_info > (3, 0)):
-                    f.write(base64.decodebytes(str.encode(item['extra'])))
-                else:
-                    f.write(base64.decodebytes(item['extra']))
+                write_base64_data(f, item['extra'])
         else:
-            if (sys.version_info > (3, 0)):
-                f.write(base64.decodebytes(str.encode(item['extra'])))
-            else:
-                f.write(base64.decodebytes(item['extra']))
+            write_base64_data(f, item['extra'])
 
     def dumps(self, entries):
         f = io.BytesIO('')
@@ -314,10 +317,7 @@ class pipes_data_extra_handler:
         return base64.encodebytes(data).decode('utf-8')
 
     def dump(self, extra, f, pload):
-        if (sys.version_info > (3, 0)):
-            data = base64.decodebytes(str.encode(extra))
-        else:
-            data = base64.decodebytes(extra)
+        data = decode_base64_data(extra)
         f.write(data)
 
     def skip(self, f, pload):
@@ -332,10 +332,7 @@ class sk_queues_extra_handler:
         return base64.encodebytes(data).decode('utf-8')
 
     def dump(self, extra, f, _unused):
-        if (sys.version_info > (3, 0)):
-            data = base64.decodebytes(str.encode(extra))
-        else:
-            data = base64.decodebytes(extra)
+        data = decode_base64_data(extra)
         f.write(data)
 
     def skip(self, f, pload):
@@ -356,12 +353,8 @@ class tcp_stream_extra_handler:
         return d
 
     def dump(self, extra, f, _unused):
-        if (sys.version_info > (3, 0)):
-            inq = base64.decodebytes(str.encode(extra['inq']))
-            outq = base64.decodebytes(str.encode(extra['outq']))
-        else:
-            inq = base64.decodebytes(extra['inq'])
-            outq = base64.decodebytes(extra['outq'])
+        inq = decode_base64_data(extra['inq'])
+        outq = decode_base64_data(extra['outq'])
 
         f.write(inq)
         f.write(outq)
@@ -370,6 +363,7 @@ class tcp_stream_extra_handler:
         f.seek(0, os.SEEK_END)
         return pbuff.inq_len + pbuff.outq_len
 
+
 class bpfmap_data_extra_handler:
     def load(self, f, pload):
         size = pload.keys_bytes + pload.values_bytes
@@ -384,14 +378,13 @@ class bpfmap_data_extra_handler:
         f.seek(pload.bytes, os.SEEK_CUR)
         return pload.bytes
 
+
 class ipc_sem_set_handler:
     def load(self, f, pbuff):
         entry = pb2dict.pb2dict(pbuff)
         size = sizeof_u16 * entry['nsems']
         rounded = round_up(size, sizeof_u64)
-        s = array.array('H')
-        if s.itemsize != sizeof_u16:
-            raise Exception("Array size mismatch")
+        s = self._get_sem_array()
         s.frombytes(f.read(size))
         f.seek(rounded - size, 1)
         return s.tolist()
@@ -400,9 +393,7 @@ class ipc_sem_set_handler:
         entry = pb2dict.pb2dict(pbuff)
         size = sizeof_u16 * entry['nsems']
         rounded = round_up(size, sizeof_u64)
-        s = array.array('H')
-        if s.itemsize != sizeof_u16:
-            raise Exception("Array size mismatch")
+        s = self._get_sem_array()
         s.fromlist(extra)
         if len(s) != entry['nsems']:
             raise Exception("Number of semaphores mismatch")
@@ -415,23 +406,16 @@ class ipc_sem_set_handler:
         f.seek(round_up(size, sizeof_u64), os.SEEK_CUR)
         return size
 
+    def _get_sem_array(self):
+        s = array.array('H')
+        if s.itemsize != sizeof_u16:
+            raise Exception("Array size mismatch")
+        return s
+
 
 class ipc_msg_queue_handler:
     def load(self, f, pbuff):
-        entry = pb2dict.pb2dict(pbuff)
-        messages = []
-        for x in range(0, entry['qnum']):
-            buf = f.read(4)
-            if buf == '':
-                break
-            size, = struct.unpack('i', buf)
-            msg = pb.ipc_msg()
-            msg.ParseFromString(f.read(size))
-            rounded = round_up(msg.msize, sizeof_u64)
-            data = f.read(msg.msize)
-            f.seek(rounded - msg.msize, 1)
-            messages.append(pb2dict.pb2dict(msg))
-            messages.append(base64.encodebytes(data).decode('utf-8'))
+        messages, _ = self._read_messages(f, pbuff)
         return messages
 
     def dump(self, extra, f, pbuff):
@@ -443,28 +427,37 @@ class ipc_msg_queue_handler:
             f.write(struct.pack('i', size))
             f.write(msg_str)
             rounded = round_up(msg.msize, sizeof_u64)
-            if (sys.version_info > (3, 0)):
-                data = base64.decodebytes(str.encode(extra[i + 1]))
-            else:
-                data = base64.decodebytes(extra[i + 1])
+            data = decode_base64_data(extra[i + 1])
             f.write(data[:msg.msize])
             f.write(b'\0' * (rounded - msg.msize))
 
     def skip(self, f, pbuff):
+        _, pl_len = self._read_messages(f, pbuff, skip_data=True)
+        return pl_len
+
+    def _read_messages(self, f, pbuff, skip_data=False):
         entry = pb2dict.pb2dict(pbuff)
+        messages = []
         pl_len = 0
         for x in range(0, entry['qnum']):
             buf = f.read(4)
-            if buf == '':
+            if len(buf) == 0:
                 break
             size, = struct.unpack('i', buf)
             msg = pb.ipc_msg()
             msg.ParseFromString(f.read(size))
             rounded = round_up(msg.msize, sizeof_u64)
-            f.seek(rounded, os.SEEK_CUR)
             pl_len += size + msg.msize
 
-        return pl_len
+            if skip_data:
+                f.seek(rounded, os.SEEK_CUR)
+            else:
+                data = f.read(msg.msize)
+                f.seek(rounded - msg.msize, 1)
+                messages.append(pb2dict.pb2dict(msg))
+                messages.append(base64.encodebytes(data).decode('utf-8'))
+
+        return messages, pl_len
 
 
 class ipc_shm_handler:
@@ -560,7 +553,7 @@ handlers = {
     'MEMFD_INODE': entry_handler(pb.memfd_inode_entry),
     'BPFMAP_FILE': entry_handler(pb.bpfmap_file_entry),
     'BPFMAP_DATA': entry_handler(pb.bpfmap_data_entry,
-                                bpfmap_data_extra_handler()),
+                                 bpfmap_data_extra_handler()),
     'APPARMOR': entry_handler(pb.apparmor_entry),
 }
 
@@ -574,12 +567,12 @@ def __rhandler(f):
 
     try:
         m = magic.by_val[img_magic]
-    except:
+    except Exception:
         raise MagicException(img_magic)
 
     try:
         handler = handlers[m]
-    except:
+    except Exception:
         raise Exception("No handler found for image with magic " + m)
 
     return m, handler
@@ -641,7 +634,7 @@ def dump(img, f):
 
     try:
         handler = handlers[m]
-    except:
+    except Exception:
         raise Exception("No handler found for image with such magic")
 
     handler.dump(img['entries'], f)
diff --git a/lib/py/images/pb2dict.py b/lib/pycriu/images/pb2dict.py
similarity index 95%
rename from lib/py/images/pb2dict.py
rename to lib/pycriu/images/pb2dict.py
index 9d581c375..f22887a52 100644
--- a/lib/py/images/pb2dict.py
+++ b/lib/pycriu/images/pb2dict.py
@@ -3,7 +3,6 @@ import collections
 import os
 import quopri
 import socket
-import sys
 from ipaddress import IPv4Address, IPv6Address, ip_address
 
 from google.protobuf.descriptor import FieldDescriptor as FD
@@ -84,6 +83,7 @@ mmap_prot_map = [
 mmap_flags_map = [
     ('MAP_SHARED', 0x1),
     ('MAP_PRIVATE', 0x2),
+    ('MAP_DROPPABLE', 0x08),
     ('MAP_ANON', 0x20),
     ('MAP_GROWSDOWN', 0x0100),
 ]
@@ -103,6 +103,9 @@ mmap_status_map = [
     ('VMA_AREA_SOCKET', 1 << 11),
     ('VMA_AREA_VVAR', 1 << 12),
     ('VMA_AREA_AIORING', 1 << 13),
+    ('VMA_AREA_MEMFD', 1 << 14),
+    ('VMA_AREA_SHSTK', 1 << 15),
+    ('VMA_AREA_UPROBES', 1 << 17),
     ('VMA_UNSUPP', 1 << 31),
 ]
 
@@ -151,8 +154,9 @@ flags_maps = {
 gen_maps = {
     'task_state': {
         1: 'Alive',
-        3: 'Zombie',
-        6: 'Stopped'
+        2: 'Dead',
+        3: 'Stopped',
+        6: 'Zombie',
     },
 }
 
@@ -247,17 +251,11 @@ def encode_dev(field, value):
 
 
 def encode_base64(value):
-    if (sys.version_info > (3, 0)):
-        return base64.encodebytes(value).decode()
-    else:
-        return base64.encodebytes(value)
+    return base64.encodebytes(value).decode()
 
 
 def decode_base64(value):
-    if (sys.version_info > (3, 0)):
-        return base64.decodebytes(str.encode(value))
-    else:
-        return base64.decodebytes(value)
+    return base64.decodebytes(str.encode(value))
 
 
 def encode_unix(value):
@@ -309,7 +307,7 @@ def _pb2dict_cast(field, value, pretty=False, is_hex=False):
         return field.enum_type.values_by_number.get(value, None).name
     elif field.type in _basic_cast:
         cast = _basic_cast[field.type]
-        if pretty and (cast == int):
+        if pretty and cast is int:
             if is_hex:
                 # Fields that have (criu).hex = true option set
                 # should be stored in hex string format.
@@ -364,21 +362,24 @@ def pb2dict(pb, pretty=False, is_hex=False):
         else:
             d_val = _pb2dict_cast(field, value, pretty, is_hex)
 
-        d[field.name] = d_val.decode() if type(d_val) == bytes else d_val
+        try:
+            d[field.name] = d_val.decode()
+        except (UnicodeDecodeError, AttributeError):
+            d[field.name] = d_val
     return d
 
 
 def _dict2pb_cast(field, value):
     # Not considering TYPE_MESSAGE here, as repeated
     # and non-repeated messages need special treatment
-    # in this case, and are hadled separately.
+    # in this case, and are handled separately.
     if field.type == FD.TYPE_BYTES:
         return get_bytes_dec(field)(value)
     elif field.type == FD.TYPE_ENUM:
         return field.enum_type.values_by_name.get(value, None).number
     elif field.type in _basic_cast:
         cast = _basic_cast[field.type]
-        if (cast == int) and is_string(value):
+        if cast is int and is_string(value):
             if _marked_as_dev(field):
                 return encode_dev(field, value)
 
diff --git a/lib/pyproject.toml b/lib/pyproject.toml
new file mode 100644
index 000000000..ea9f88dcc
--- /dev/null
+++ b/lib/pyproject.toml
@@ -0,0 +1,20 @@
+[build-system]
+requires = ["setuptools", "protobuf<4.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pycriu"
+description = "Python bindings for CRIU"
+authors = [
+    {name = "CRIU team", email = "criu@lists.linux.dev"},
+]
+license = {text = "LGPLv2.1"}
+dynamic = ["version"]
+requires-python = ">=3.6"
+dependencies = ["protobuf"]
+
+[tool.setuptools]
+packages = ["pycriu", "pycriu.images"]
+
+[tool.setuptools.dynamic]
+version = {attr = "pycriu.__version__"}
diff --git a/lib/setup.cfg b/lib/setup.cfg
new file mode 100644
index 000000000..28c9e49c3
--- /dev/null
+++ b/lib/setup.cfg
@@ -0,0 +1,18 @@
+# Configuring setuptools using pyproject.toml files was introduced in setuptools 61.0.0
+# https://setuptools.pypa.io/en/latest/history.html#v61-0-0
+# For older versions of setuptools, we need to use the setup.cfg file
+# https://setuptools.pypa.io/en/latest/userguide/declarative_config.html#declarative-config
+
+[metadata]
+name = pycriu
+description = Python bindings for CRIU
+author = CRIU team
+author_email = criu@lists.linux.dev
+license = LGPLv2.1
+version = attr: pycriu.__version__
+
+[options]
+packages = find:
+python_requires = >=3.6
+install_requires =
+    protobuf
diff --git a/crit/crit-python3 b/lib/setup.py
old mode 100755
new mode 100644
similarity index 55%
rename from crit/crit-python3
rename to lib/setup.py
index 80467cba7..618ac1de4
--- a/crit/crit-python3
+++ b/lib/setup.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
+import setuptools
 
-from pycriu import cli
 
 if __name__ == '__main__':
-	cli.main()
+    setuptools.setup()
diff --git a/plugins/amdgpu/.gitignore b/plugins/amdgpu/.gitignore
new file mode 100644
index 000000000..4e5c8f58e
--- /dev/null
+++ b/plugins/amdgpu/.gitignore
@@ -0,0 +1,3 @@
+*.pb-c.c
+*.pb-c.h
+test_topology_remap
diff --git a/plugins/amdgpu/Makefile b/plugins/amdgpu/Makefile
index 367a52c99..250e7b0e7 100644
--- a/plugins/amdgpu/Makefile
+++ b/plugins/amdgpu/Makefile
@@ -12,10 +12,10 @@ LIBDRM_INC 		:= -I/usr/include/libdrm
 DEPS_OK 		:= amdgpu_plugin.so amdgpu_plugin_test
 DEPS_NOK 		:= ;
 
+__nmk_dir ?= ../../scripts/nmk/scripts/
 include $(__nmk_dir)msg.mk
 
-CC      		:= gcc
-PLUGIN_CFLAGS  		:= -g -Wall -Werror -D _GNU_SOURCE -shared -nostartfiles -fPIC -DCR_PLUGIN_DEFAULT="$(PLUGINDIR)"
+PLUGIN_CFLAGS  		:= -g -Wall -Werror -D _GNU_SOURCE -shared -nostartfiles -fPIC
 PLUGIN_LDFLAGS		:= -lpthread -lrt -ldrm -ldrm_amdgpu
 
 ifeq ($(CONFIG_AMDGPU),y)
@@ -25,10 +25,10 @@ else
 endif
 
 criu-amdgpu.pb-c.c: criu-amdgpu.proto
-		protoc-c --proto_path=. --c_out=. criu-amdgpu.proto
+		protoc --proto_path=. --c_out=. criu-amdgpu.proto
 
-amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_topology.c criu-amdgpu.pb-c.c
-	$(CC) $(PLUGIN_CFLAGS) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC)
+amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_dmabuf.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c amdgpu_socket_utils.c
+	$(CC) $(PLUGIN_CFLAGS) $(DEFINES) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC)
 
 amdgpu_plugin_clean:
 	$(call msg-clean, $@)
@@ -53,7 +53,7 @@ install:
 ifeq ($(CONFIG_AMDGPU),y)
 	$(Q) mkdir -p $(DESTDIR)$(PLUGINDIR)
 	$(E) "  INSTALL " $(PLUGIN_NAME)
-	$(Q) install -m 644 $(PLUGIN_SOBJ) $(DESTDIR)$(PLUGINDIR)
+	$(Q) install -m 755 $(PLUGIN_SOBJ) $(DESTDIR)$(PLUGINDIR)
 endif
 .PHONY: install
 
diff --git a/plugins/amdgpu/README.md b/plugins/amdgpu/README.md
index 6809ec8b9..b808fbc4f 100644
--- a/plugins/amdgpu/README.md
+++ b/plugins/amdgpu/README.md
@@ -3,7 +3,8 @@ Supporting ROCm with CRIU
 
 _Felix Kuehling <Felix.Kuehling@amd.com>_<br>
 _Rajneesh Bardwaj <Rajneesh.Bhardwaj@amd.com>_<br>
-_David Yat Sin <David.YatSin@amd.com>_
+_David Yat Sin <David.YatSin@amd.com>_<br>
+_Yanning Yang <yangyanning@sjtu.edu.cn>_
 
 # Introduction
 
@@ -224,6 +225,26 @@ to resume execution on the GPUs.
 *This new plugin is enabled by the new hook `__RESUME_DEVICES_LATE` in our RFC
 patch series.*
 
+## Restoring BO content in parallel
+
+Restoring the BO content is an important part in the restore of GPU state and
+usually takes a significant amount of time. A possible location for this
+procedure is the `cr_plugin_restore_file` hook. However, restoring in this hook
+blocks the target process from performing other restore operations, which
+hinders further optimization of the restore process.
+
+Therefore, a new plugin hook that runs in the master restore process is
+introduced, and it interacts with the `cr_plugin_restore_file` hook to complete
+the restore of BO content. Specifically, the target process only needs to send
+the relevant BOs to the master restore process, while this new hook handles all
+the restore of buffer objects. Through this method, during the restore of the BO
+content, the target process can perform other restore operations, thus
+accelerating the restore procedure. This is an implementation of the gCROP
+method proposed in the ACM SoCC'24 paper: [On-demand and Parallel
+Checkpoint/Restore for GPU Applications](https://dl.acm.org/doi/10.1145/3698038.3698510).
+
+*This optimization technique is enabled by the `__POST_FORKING` hook.*
+
 ## Other CRIU changes
 
 In addition to the new plugins, we need to make some changes to CRIU itself to
@@ -263,7 +284,7 @@ ROCm | Radeon Open Compute Platform
 Thunk | User-mode API interface  to interact with amdgpu.ko
 KFD | AMD Kernel Fusion Driver
 Mesa | Open source OpenGL implementation
-GTT | Graphis Translation Table, also used to denote kernel-managed system memory for GPU access
+GTT | Graphics Translation Table, also used to denote kernel-managed system memory for GPU access
 VRAM | Video RAM
 BO | Buffer Object
 HMM | Heterogeneous Memory Management
diff --git a/plugins/amdgpu/amdgpu_drm.h b/plugins/amdgpu/amdgpu_drm.h
new file mode 100644
index 000000000..69227a12b
--- /dev/null
+++ b/plugins/amdgpu/amdgpu_drm.h
@@ -0,0 +1,1801 @@
+/* amdgpu_drm.h -- Public header for the amdgpu driver -*- linux-c -*-
+ *
+ * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas.
+ * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
+ * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Kevin E. Martin <martin@valinux.com>
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __AMDGPU_DRM_H__
+#define __AMDGPU_DRM_H__
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_AMDGPU_GEM_CREATE		0x00
+#define DRM_AMDGPU_GEM_MMAP		0x01
+#define DRM_AMDGPU_CTX			0x02
+#define DRM_AMDGPU_BO_LIST		0x03
+#define DRM_AMDGPU_CS			0x04
+#define DRM_AMDGPU_INFO			0x05
+#define DRM_AMDGPU_GEM_METADATA		0x06
+#define DRM_AMDGPU_GEM_WAIT_IDLE	0x07
+#define DRM_AMDGPU_GEM_VA		0x08
+#define DRM_AMDGPU_WAIT_CS		0x09
+#define DRM_AMDGPU_GEM_OP		0x10
+#define DRM_AMDGPU_GEM_USERPTR		0x11
+#define DRM_AMDGPU_WAIT_FENCES		0x12
+#define DRM_AMDGPU_VM			0x13
+#define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
+#define DRM_AMDGPU_SCHED		0x15
+#define DRM_AMDGPU_USERQ		0x16
+#define DRM_AMDGPU_USERQ_SIGNAL		0x17
+#define DRM_AMDGPU_USERQ_WAIT		0x18
+#define DRM_AMDGPU_GEM_LIST_HANDLES	0x19
+/* not upstream */
+#define DRM_AMDGPU_GEM_DGMA		0x5c
+
+/* hybrid specific ioctls */
+#define DRM_AMDGPU_SEM			0x5b
+
+#define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
+#define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
+#define DRM_IOCTL_AMDGPU_CTX		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union drm_amdgpu_ctx)
+#define DRM_IOCTL_AMDGPU_BO_LIST	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, union drm_amdgpu_bo_list)
+#define DRM_IOCTL_AMDGPU_CS		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union drm_amdgpu_cs)
+#define DRM_IOCTL_AMDGPU_INFO		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct drm_amdgpu_info)
+#define DRM_IOCTL_AMDGPU_GEM_METADATA	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct drm_amdgpu_gem_metadata)
+#define DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union drm_amdgpu_gem_wait_idle)
+#define DRM_IOCTL_AMDGPU_GEM_VA		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, struct drm_amdgpu_gem_va)
+#define DRM_IOCTL_AMDGPU_WAIT_CS	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs)
+#define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
+#define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
+#define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+#define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
+#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
+#define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
+#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
+#define DRM_IOCTL_AMDGPU_USERQ_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
+#define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
+
+#define DRM_IOCTL_AMDGPU_GEM_DGMA	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma)
+
+/**
+ * DOC: memory domains
+ *
+ * %AMDGPU_GEM_DOMAIN_CPU	System memory that is not GPU accessible.
+ * Memory in this pool could be swapped out to disk if there is pressure.
+ *
+ * %AMDGPU_GEM_DOMAIN_GTT	GPU accessible system memory, mapped into the
+ * GPU's virtual address space via gart. Gart memory linearizes non-contiguous
+ * pages of system memory, allows GPU access system memory in a linearized
+ * fashion.
+ *
+ * %AMDGPU_GEM_DOMAIN_VRAM	Local video memory. For APUs, it is memory
+ * carved out by the BIOS.
+ *
+ * %AMDGPU_GEM_DOMAIN_GDS	Global on-chip data storage used to share data
+ * across shader threads.
+ *
+ * %AMDGPU_GEM_DOMAIN_GWS	Global wave sync, used to synchronize the
+ * execution of all the waves on a device.
+ *
+ * %AMDGPU_GEM_DOMAIN_OA	Ordered append, used by 3D or Compute engines
+ * for appending data.
+ *
+ * %AMDGPU_GEM_DOMAIN_DOORBELL	Doorbell. It is an MMIO region for
+ * signalling user mode queues.
+ *
+ * %AMDGPU_GEM_DOMAIN_MMIO_REMAP	MMIO remap page (special mapping for HDP flushing).
+ */
+/* hybrid specific ioctls */
+#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
+
+#define AMDGPU_GEM_DOMAIN_CPU		0x1
+#define AMDGPU_GEM_DOMAIN_GTT		0x2
+#define AMDGPU_GEM_DOMAIN_VRAM		0x4
+#define AMDGPU_GEM_DOMAIN_GDS		0x8
+#define AMDGPU_GEM_DOMAIN_GWS		0x10
+#define AMDGPU_GEM_DOMAIN_OA		0x20
+#define AMDGPU_GEM_DOMAIN_DOORBELL	0x40
+#define AMDGPU_GEM_DOMAIN_MMIO_REMAP	0x80
+#define AMDGPU_GEM_DOMAIN_DGMA		0x400
+#define AMDGPU_GEM_DOMAIN_DGMA_IMPORT	0x800
+
+#define AMDGPU_GEM_DOMAIN_MASK		(AMDGPU_GEM_DOMAIN_CPU | \
+					 AMDGPU_GEM_DOMAIN_GTT | \
+					 AMDGPU_GEM_DOMAIN_VRAM | \
+					 AMDGPU_GEM_DOMAIN_GDS | \
+					 AMDGPU_GEM_DOMAIN_GWS | \
+					 AMDGPU_GEM_DOMAIN_OA |\
+					 AMDGPU_GEM_DOMAIN_DOORBELL |\
+					 AMDGPU_GEM_DOMAIN_MMIO_REMAP |\
+					 AMDGPU_GEM_DOMAIN_DGMA |\
+					 AMDGPU_GEM_DOMAIN_DGMA_IMPORT)
+
+/* Flag that CPU access will be required for the case of VRAM domain */
+#define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED	(1 << 0)
+/* Flag that CPU access will not work, this VRAM domain is invisible */
+#define AMDGPU_GEM_CREATE_NO_CPU_ACCESS		(1 << 1)
+/* Flag that USWC attributes should be used for GTT */
+#define AMDGPU_GEM_CREATE_CPU_GTT_USWC		(1 << 2)
+/* Flag that the memory should be in VRAM and cleared */
+#define AMDGPU_GEM_CREATE_VRAM_CLEARED		(1 << 3)
+/* Flag that allocating the BO should use linear VRAM */
+#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
+/* Flag that BO is always valid in this VM */
+#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID	(1 << 6)
+/* Flag that BO sharing will be explicitly synchronized */
+#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC		(1 << 7)
+/* Flag that indicates allocating MQD gart on GFX9, where the mtype
+ * for the second page onward should be set to NC. It should never
+ * be used by user space applications.
+ */
+#define AMDGPU_GEM_CREATE_CP_MQD_GFX9		(1 << 8)
+/* Flag that BO may contain sensitive data that must be wiped before
+ * releasing the memory
+ */
+#define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE	(1 << 9)
+/* Flag that BO will be encrypted and that the TMZ bit should be
+ * set in the PTEs when mapping this buffer via GPUVM or
+ * accessing it with various hw blocks
+ */
+#define AMDGPU_GEM_CREATE_ENCRYPTED		(1 << 10)
+/* Flag that BO will be used only in preemptible context, which does
+ * not require GTT memory accounting
+ */
+#define AMDGPU_GEM_CREATE_PREEMPTIBLE		(1 << 11)
+/* Flag that BO can be discarded under memory pressure without keeping the
+ * content.
+ */
+#define AMDGPU_GEM_CREATE_DISCARDABLE		(1 << 12)
+/* Flag that BO is shared coherently between multiple devices or CPU threads.
+ * May depend on GPU instructions to flush caches to system scope explicitly.
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_COHERENT		(1 << 13)
+/* Flag that BO should not be cached by GPU. Coherent without having to flush
+ * GPU caches explicitly
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_UNCACHED		(1 << 14)
+/* Flag that BO should be coherent across devices when using device-level
+ * atomics. May depend on GPU instructions to flush caches to device scope
+ * explicitly, promoting them to system scope automatically.
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_EXT_COHERENT		(1 << 15)
+/* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */
+#define AMDGPU_GEM_CREATE_GFX12_DCC		(1 << 16)
+
+/* hybrid specific */
+/* Flag that the memory should be in SPARSE resource */
+#define AMDGPU_GEM_CREATE_SPARSE		(1ULL << 29)
+/* Flag that the memory allocation should be from top of domain */
+#define AMDGPU_GEM_CREATE_TOP_DOWN		(1ULL << 30)
+/* Flag that the memory allocation should be pinned */
+#define AMDGPU_GEM_CREATE_NO_EVICT		(1ULL << 31)
+
+struct drm_amdgpu_gem_create_in  {
+	/** the requested memory size */
+	__u64 bo_size;
+	/** physical start_addr alignment in bytes for some HW requirements */
+	__u64 alignment;
+	/** the requested memory domains */
+	__u64 domains;
+	/** allocation flags */
+	__u64 domain_flags;
+};
+
+struct drm_amdgpu_gem_create_out  {
+	/** returned GEM object handle */
+	__u32 handle;
+	__u32 _pad;
+};
+
+union drm_amdgpu_gem_create {
+	struct drm_amdgpu_gem_create_in		in;
+	struct drm_amdgpu_gem_create_out	out;
+};
+
+/** Opcode to create new residency list.  */
+#define AMDGPU_BO_LIST_OP_CREATE	0
+/** Opcode to destroy previously created residency list */
+#define AMDGPU_BO_LIST_OP_DESTROY	1
+/** Opcode to update resource information in the list */
+#define AMDGPU_BO_LIST_OP_UPDATE	2
+
+struct drm_amdgpu_bo_list_in {
+	/** Type of operation */
+	__u32 operation;
+	/** Handle of list or 0 if we want to create one */
+	__u32 list_handle;
+	/** Number of BOs in list  */
+	__u32 bo_number;
+	/** Size of each element describing BO */
+	__u32 bo_info_size;
+	/** Pointer to array describing BOs */
+	__u64 bo_info_ptr;
+};
+
+struct drm_amdgpu_bo_list_entry {
+	/** Handle of BO */
+	__u32 bo_handle;
+	/** New (if specified) BO priority to be used during migration */
+	__u32 bo_priority;
+};
+
+struct drm_amdgpu_bo_list_out {
+	/** Handle of resource list  */
+	__u32 list_handle;
+	__u32 _pad;
+};
+
+union drm_amdgpu_bo_list {
+	struct drm_amdgpu_bo_list_in in;
+	struct drm_amdgpu_bo_list_out out;
+};
+
+/* context related */
+#define AMDGPU_CTX_OP_ALLOC_CTX	1
+#define AMDGPU_CTX_OP_FREE_CTX	2
+#define AMDGPU_CTX_OP_QUERY_STATE	3
+#define AMDGPU_CTX_OP_QUERY_STATE2	4
+#define AMDGPU_CTX_OP_GET_STABLE_PSTATE	5
+#define AMDGPU_CTX_OP_SET_STABLE_PSTATE	6
+
+/* GPU reset status */
+#define AMDGPU_CTX_NO_RESET		0
+/* this the context caused it */
+#define AMDGPU_CTX_GUILTY_RESET		1
+/* some other context caused it */
+#define AMDGPU_CTX_INNOCENT_RESET	2
+/* unknown cause */
+#define AMDGPU_CTX_UNKNOWN_RESET	3
+
+/* indicate gpu reset occurred after ctx created */
+#define AMDGPU_CTX_QUERY2_FLAGS_RESET    (1<<0)
+/* indicate vram lost occurred after ctx created */
+#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1)
+/* indicate some job from this context once cause gpu hang */
+#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY   (1<<2)
+/* indicate some errors are detected by RAS */
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE   (1<<3)
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE   (1<<4)
+/* indicate that the reset hasn't completed yet */
+#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5)
+
+/* Context priority level */
+#define AMDGPU_CTX_PRIORITY_UNSET       -2048
+#define AMDGPU_CTX_PRIORITY_VERY_LOW    -1023
+#define AMDGPU_CTX_PRIORITY_LOW         -512
+#define AMDGPU_CTX_PRIORITY_NORMAL      0
+/*
+ * When used in struct drm_amdgpu_ctx_in, a priority above NORMAL requires
+ * CAP_SYS_NICE or DRM_MASTER
+*/
+#define AMDGPU_CTX_PRIORITY_HIGH        512
+#define AMDGPU_CTX_PRIORITY_VERY_HIGH   1023
+
+/* select a stable profiling pstate for perfmon tools */
+#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK  0xf
+#define AMDGPU_CTX_STABLE_PSTATE_NONE  0
+#define AMDGPU_CTX_STABLE_PSTATE_STANDARD  1
+#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK  2
+#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
+#define AMDGPU_CTX_STABLE_PSTATE_PEAK  4
+
+struct drm_amdgpu_ctx_in {
+	/** AMDGPU_CTX_OP_* */
+	__u32	op;
+	/** Flags */
+	__u32	flags;
+	__u32	ctx_id;
+	/** AMDGPU_CTX_PRIORITY_* */
+	__s32	priority;
+};
+
+union drm_amdgpu_ctx_out {
+		struct {
+			__u32	ctx_id;
+			__u32	_pad;
+		} alloc;
+
+		struct {
+			/** For future use, no flags defined so far */
+			__u64	flags;
+			/** Number of resets caused by this context so far. */
+			__u32	hangs;
+			/** Reset status since the last call of the ioctl. */
+			__u32	reset_status;
+		} state;
+
+		struct {
+			__u32	flags;
+			__u32	_pad;
+		} pstate;
+};
+
+union drm_amdgpu_ctx {
+	struct drm_amdgpu_ctx_in in;
+	union drm_amdgpu_ctx_out out;
+};
+
+/* user queue IOCTL operations */
+#define AMDGPU_USERQ_OP_CREATE	1
+#define AMDGPU_USERQ_OP_FREE	2
+
+/* queue priority levels */
+/* low < normal low < normal high < high */
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK  0x3
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT 0
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW 0
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */
+/* for queues that need access to protected content */
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE  (1 << 2)
+
+/*
+ * This structure is a container to pass input configuration
+ * info for all supported userqueue related operations.
+ * For operation AMDGPU_USERQ_OP_CREATE: user is expected
+ *  to set all fields, excep the parameter 'queue_id'.
+ * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected
+ *  to be set is 'queue_id', eveything else is ignored.
+ */
+struct drm_amdgpu_userq_in {
+	/** AMDGPU_USERQ_OP_* */
+	__u32	op;
+	/** Queue id passed for operation USERQ_OP_FREE */
+	__u32	queue_id;
+	/** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */
+	__u32   ip_type;
+	/**
+	 * @doorbell_handle: the handle of doorbell GEM object
+	 * associated with this userqueue client.
+	 */
+	__u32   doorbell_handle;
+	/**
+	 * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo.
+	 * Kernel will generate absolute doorbell offset using doorbell_handle
+	 * and doorbell_offset in the doorbell bo.
+	 */
+	__u32   doorbell_offset;
+	/**
+	 * @flags: flags used for queue parameters
+	 */
+	__u32 flags;
+	/**
+	 * @queue_va: Virtual address of the GPU memory which holds the queue
+	 * object. The queue holds the workload packets.
+	 */
+	__u64   queue_va;
+	/**
+	 * @queue_size: Size of the queue in bytes, this needs to be 256-byte
+	 * aligned.
+	 */
+	__u64   queue_size;
+	/**
+	 * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 */
+	__u64   rptr_va;
+	/**
+	 * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 *
+	 * Queue, RPTR and WPTR can come from the same object, as long as the size
+	 * and alignment related requirements are met.
+	 */
+	__u64   wptr_va;
+	/**
+	 * @mqd: MQD (memory queue descriptor) is a set of parameters which allow
+	 * the GPU to uniquely define and identify a usermode queue.
+	 *
+	 * MQD data can be of different size for different GPU IP/engine and
+	 * their respective versions/revisions, so this points to a __u64 *
+	 * which holds IP specific MQD of this usermode queue.
+	 */
+	__u64 mqd;
+	/**
+	 * @size: size of MQD data in bytes, it must match the MQD structure
+	 * size of the respective engine/revision defined in UAPI for ex, for
+	 * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11).
+	 */
+	__u64 mqd_size;
+};
+
+/* The structure to carry output of userqueue ops */
+struct drm_amdgpu_userq_out {
+	/**
+	 * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique
+	 * queue ID to represent the newly created userqueue in the system, otherwise
+	 * it should be ignored.
+	 */
+	__u32	queue_id;
+	__u32 _pad;
+};
+
+union drm_amdgpu_userq {
+	struct drm_amdgpu_userq_in in;
+	struct drm_amdgpu_userq_out out;
+};
+
+/* GFX V11 IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_gfx11 {
+	/**
+	 * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   shadow_va;
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 SDMA IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_sdma_gfx11 {
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 Compute IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_compute_gfx11 {
+	/**
+	 * @eop_va: Virtual address of the GPU memory to hold the EOP buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   eop_va;
+};
+
+/* userq signal/wait ioctl */
+struct drm_amdgpu_userq_signal {
+	/**
+	 * @queue_id: Queue handle used by the userq fence creation function
+	 * to retrieve the WPTR.
+	 */
+	__u32	queue_id;
+	__u32	pad;
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to be signaled.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u64	num_syncobj_handles;
+	/**
+	 * @bo_read_handles: The list of BO handles that the submitted user queue job
+	 * is using for read only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of BO handles that the submitted user queue job
+	 * is using for write only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+};
+
+struct drm_amdgpu_userq_fence_info {
+	/**
+	 * @va: A gpu address allocated for each queue which stores the
+	 * read pointer (RPTR) value.
+	 */
+	__u64	va;
+	/**
+	 * @value: A 64 bit value represents the write pointer (WPTR) of the
+	 * queue commands which compared with the RPTR value to signal the
+	 * fences.
+	 */
+	__u64	value;
+};
+
+struct drm_amdgpu_userq_wait {
+	/**
+	 * @waitq_id: Queue handle used by the userq wait IOCTL to retrieve the
+	 * wait queue and maintain the fence driver references in it.
+	 */
+	__u32	waitq_id;
+	__u32	pad;
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by
+	 * the user queue job to get the va/value pairs at given @syncobj_timeline_points.
+	 */
+	__u64	syncobj_timeline_handles;
+	/**
+	 * @syncobj_timeline_points: The list of timeline syncobj points submitted by the
+	 * user queue job for the corresponding @syncobj_timeline_handles.
+	 */
+	__u64	syncobj_timeline_points;
+	/**
+	 * @bo_read_handles: The list of read BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of write BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_syncobj_timeline_handles: A count that represents the number of timeline
+	 * syncobj handles in @syncobj_timeline_handles.
+	 */
+	__u16	num_syncobj_timeline_handles;
+	/**
+	 * @num_fences: This field can be used both as input and output. As input it defines
+	 * the maximum number of fences that can be returned and as output it will specify
+	 * how many fences were actually returned from the ioctl.
+	 */
+	__u16	num_fences;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u32	num_syncobj_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+	/**
+	 * @out_fences: The field is a return value from the ioctl containing the list of
+	 * address/value pairs to wait for.
+	 */
+	__u64	out_fences;
+};
+
+/* sem related */
+#define AMDGPU_SEM_OP_CREATE_SEM        1
+#define AMDGPU_SEM_OP_WAIT_SEM	        2
+#define AMDGPU_SEM_OP_SIGNAL_SEM        3
+#define AMDGPU_SEM_OP_DESTROY_SEM       4
+#define AMDGPU_SEM_OP_IMPORT_SEM	5
+#define AMDGPU_SEM_OP_EXPORT_SEM	6
+
+struct drm_amdgpu_sem_in {
+	/** AMDGPU_SEM_OP_* */
+	uint32_t	op;
+	uint32_t        handle;
+	uint32_t	ctx_id;
+	uint32_t        ip_type;
+	uint32_t        ip_instance;
+	uint32_t        ring;
+	uint64_t        seq;
+};
+
+union drm_amdgpu_sem_out {
+	int32_t         fd;
+	uint32_t	handle;
+};
+
+union drm_amdgpu_sem {
+	struct drm_amdgpu_sem_in in;
+	union drm_amdgpu_sem_out out;
+};
+
+/* vm ioctl */
+#define AMDGPU_VM_OP_RESERVE_VMID	1
+#define AMDGPU_VM_OP_UNRESERVE_VMID	2
+
+struct drm_amdgpu_vm_in {
+	/** AMDGPU_VM_OP_* */
+	__u32	op;
+	__u32	flags;
+};
+
+struct drm_amdgpu_vm_out {
+	/** For future use, no flags defined so far */
+	__u64	flags;
+};
+
+union drm_amdgpu_vm {
+	struct drm_amdgpu_vm_in in;
+	struct drm_amdgpu_vm_out out;
+};
+
+/* sched ioctl */
+#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE	1
+#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE	2
+
+struct drm_amdgpu_sched_in {
+	/* AMDGPU_SCHED_OP_* */
+	__u32	op;
+	__u32	fd;
+	/** AMDGPU_CTX_PRIORITY_* */
+	__s32	priority;
+	__u32   ctx_id;
+};
+
+union drm_amdgpu_sched {
+	struct drm_amdgpu_sched_in in;
+};
+
+/*
+ * This is not a reliable API and you should expect it to fail for any
+ * number of reasons and have fallback path that do not use userptr to
+ * perform any operation.
+ */
+#define AMDGPU_GEM_USERPTR_READONLY	(1 << 0)
+#define AMDGPU_GEM_USERPTR_ANONONLY	(1 << 1)
+#define AMDGPU_GEM_USERPTR_VALIDATE	(1 << 2)
+#define AMDGPU_GEM_USERPTR_REGISTER	(1 << 3)
+
+struct drm_amdgpu_gem_userptr {
+	__u64		addr;
+	__u64		size;
+	/* AMDGPU_GEM_USERPTR_* */
+	__u32		flags;
+	/* Resulting GEM handle */
+	__u32		handle;
+};
+
+#define AMDGPU_GEM_DGMA_IMPORT			0
+#define AMDGPU_GEM_DGMA_QUERY_PHYS_ADDR		1
+struct drm_amdgpu_gem_dgma {
+	__u64		addr;
+	__u64		size;
+	__u32		op;
+	__u32		handle;
+};
+
+/* SI-CI-VI: */
+/* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */
+#define AMDGPU_TILING_ARRAY_MODE_SHIFT			0
+#define AMDGPU_TILING_ARRAY_MODE_MASK			0xf
+#define AMDGPU_TILING_PIPE_CONFIG_SHIFT			4
+#define AMDGPU_TILING_PIPE_CONFIG_MASK			0x1f
+#define AMDGPU_TILING_TILE_SPLIT_SHIFT			9
+#define AMDGPU_TILING_TILE_SPLIT_MASK			0x7
+#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT		12
+#define AMDGPU_TILING_MICRO_TILE_MODE_MASK		0x7
+#define AMDGPU_TILING_BANK_WIDTH_SHIFT			15
+#define AMDGPU_TILING_BANK_WIDTH_MASK			0x3
+#define AMDGPU_TILING_BANK_HEIGHT_SHIFT			17
+#define AMDGPU_TILING_BANK_HEIGHT_MASK			0x3
+#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT		19
+#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK		0x3
+#define AMDGPU_TILING_NUM_BANKS_SHIFT			21
+#define AMDGPU_TILING_NUM_BANKS_MASK			0x3
+
+/* GFX9 - GFX11: */
+#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT		0
+#define AMDGPU_TILING_SWIZZLE_MODE_MASK			0x1f
+#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT		5
+#define AMDGPU_TILING_DCC_OFFSET_256B_MASK		0xFFFFFF
+#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT		29
+#define AMDGPU_TILING_DCC_PITCH_MAX_MASK		0x3FFF
+#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT		43
+#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK		0x1
+#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT	44
+#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK		0x1
+#define AMDGPU_TILING_SCANOUT_SHIFT			63
+#define AMDGPU_TILING_SCANOUT_MASK			0x1
+
+/* GFX12 and later: */
+#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT			0
+#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK			0x7
+/* These are DCC recompression settings for memory management: */
+#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT	3
+#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK	0x3 /* 0:64B, 1:128B, 2:256B */
+#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT		5
+#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK		0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */
+#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT		8
+#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK		0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */
+/* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata
+ * to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */
+#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT	14
+#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK	0x1
+/* bit gap */
+#define AMDGPU_TILING_GFX12_SCANOUT_SHIFT			63
+#define AMDGPU_TILING_GFX12_SCANOUT_MASK			0x1
+
+/* Set/Get helpers for tiling flags. */
+#define AMDGPU_TILING_SET(field, value) \
+	(((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
+#define AMDGPU_TILING_GET(value, field) \
+	(((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)
+
+#define AMDGPU_GEM_METADATA_OP_SET_METADATA                  1
+#define AMDGPU_GEM_METADATA_OP_GET_METADATA                  2
+
+/** The same structure is shared for input/output */
+struct drm_amdgpu_gem_metadata {
+	/** GEM Object handle */
+	__u32	handle;
+	/** Do we want get or set metadata */
+	__u32	op;
+	struct {
+		/** For future use, no flags defined so far */
+		__u64	flags;
+		/** family specific tiling info */
+		__u64	tiling_info;
+		__u32	data_size_bytes;
+		__u32	data[64];
+	} data;
+};
+
+struct drm_amdgpu_gem_mmap_in {
+	/** the GEM object handle */
+	__u32 handle;
+	__u32 _pad;
+};
+
+struct drm_amdgpu_gem_mmap_out {
+	/** mmap offset from the vma offset manager */
+	__u64 addr_ptr;
+};
+
+union drm_amdgpu_gem_mmap {
+	struct drm_amdgpu_gem_mmap_in   in;
+	struct drm_amdgpu_gem_mmap_out out;
+};
+
+struct drm_amdgpu_gem_wait_idle_in {
+	/** GEM object handle */
+	__u32 handle;
+	/** For future use, no flags defined so far */
+	__u32 flags;
+	/** Absolute timeout to wait */
+	__u64 timeout;
+};
+
+struct drm_amdgpu_gem_wait_idle_out {
+	/** BO status:  0 - BO is idle, 1 - BO is busy */
+	__u32 status;
+	/** Returned current memory domain */
+	__u32 domain;
+};
+
+union drm_amdgpu_gem_wait_idle {
+	struct drm_amdgpu_gem_wait_idle_in  in;
+	struct drm_amdgpu_gem_wait_idle_out out;
+};
+
+struct drm_amdgpu_wait_cs_in {
+	/* Command submission handle
+         * handle equals 0 means none to wait for
+         * handle equals ~0ull means wait for the latest sequence number
+         */
+	__u64 handle;
+	/** Absolute timeout to wait */
+	__u64 timeout;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
+};
+
+struct drm_amdgpu_wait_cs_out {
+	/** CS status:  0 - CS completed, 1 - CS still busy */
+	__u64 status;
+};
+
+union drm_amdgpu_wait_cs {
+	struct drm_amdgpu_wait_cs_in in;
+	struct drm_amdgpu_wait_cs_out out;
+};
+
+struct drm_amdgpu_fence {
+	__u32 ctx_id;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u64 seq_no;
+};
+
+struct drm_amdgpu_wait_fences_in {
+	/** This points to uint64_t * which points to fences */
+	__u64 fences;
+	__u32 fence_count;
+	__u32 wait_all;
+	__u64 timeout_ns;
+};
+
+struct drm_amdgpu_wait_fences_out {
+	__u32 status;
+	__u32 first_signaled;
+};
+
+union drm_amdgpu_wait_fences {
+	struct drm_amdgpu_wait_fences_in in;
+	struct drm_amdgpu_wait_fences_out out;
+};
+
+#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
+#define AMDGPU_GEM_OP_SET_PLACEMENT		1
+#define AMDGPU_GEM_OP_GET_MAPPING_INFO		2
+
+struct drm_amdgpu_gem_vm_entry {
+	/* Start of mapping (in bytes) */
+	__u64 addr;
+
+	/* Size of mapping (in bytes) */
+	__u64 size;
+
+	/* Mapping offset */
+	__u64 offset;
+
+	/* flags needed to recreate mapping */
+	__u64 flags;
+};
+
+/* Sets or returns a value associated with a buffer. */
+struct drm_amdgpu_gem_op {
+	/** GEM object handle */
+	__u32	handle;
+	/** AMDGPU_GEM_OP_* */
+	__u32	op;
+	/** Input or return value. For MAPPING_INFO op: pointer to array of struct drm_amdgpu_gem_vm_entry */
+	__u64	value;
+	/** For MAPPING_INFO op: number of mappings (in/out) */
+	__u32	num_entries;
+
+	__u32	padding;
+};
+
+#define AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT	(1 << 0)
+
+struct drm_amdgpu_gem_list_handles {
+	/* User pointer to array of drm_amdgpu_gem_bo_info_entry */
+	__u64   entries;
+
+	/* Size of entries buffer / Number of handles in process (if larger than size of buffer, must retry) */
+	__u32   num_entries;
+
+	__u32 padding;
+};
+
+struct drm_amdgpu_gem_list_handles_entry {
+	/* gem handle of buffer object */
+	__u32 gem_handle;
+
+	/* Currently just one flag: IS_IMPORT */
+	__u32 flags;
+
+	/* Size of bo */
+	__u64 size;
+
+	/* Preferred domains for GEM_CREATE */
+	__u64 preferred_domains;
+
+	/* GEM_CREATE flags for re-creation of buffer */
+	__u64 alloc_flags;
+
+	/* physical start_addr alignment in bytes for some HW requirements */
+	__u64 alignment;
+};
+
+#define AMDGPU_VA_OP_MAP			1
+#define AMDGPU_VA_OP_UNMAP			2
+#define AMDGPU_VA_OP_CLEAR			3
+#define AMDGPU_VA_OP_REPLACE			4
+
+/* Delay the page table update till the next CS */
+#define AMDGPU_VM_DELAY_UPDATE		(1 << 0)
+
+/* Mapping flags */
+/* readable mapping */
+#define AMDGPU_VM_PAGE_READABLE		(1 << 1)
+/* writable mapping */
+#define AMDGPU_VM_PAGE_WRITEABLE	(1 << 2)
+/* executable mapping, new for VI */
+#define AMDGPU_VM_PAGE_EXECUTABLE	(1 << 3)
+/* partially resident texture */
+#define AMDGPU_VM_PAGE_PRT		(1 << 4)
+/* MTYPE flags use bit 5 to 8 */
+#define AMDGPU_VM_MTYPE_MASK		(0xf << 5)
+/* Default MTYPE. Pre-AI must use this.  Recommended for newer ASICs. */
+#define AMDGPU_VM_MTYPE_DEFAULT		(0 << 5)
+/* Use Non Coherent MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_NC		(1 << 5)
+/* Use Write Combine MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_WC		(2 << 5)
+/* Use Cache Coherent MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_CC		(3 << 5)
+/* Use UnCached MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_UC		(4 << 5)
+/* Use Read Write MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_RW		(5 << 5)
+/* don't allocate MALL */
+#define AMDGPU_VM_PAGE_NOALLOC		(1 << 9)
+
+struct drm_amdgpu_gem_va {
+	/** GEM object handle */
+	__u32 handle;
+	__u32 _pad;
+	/** AMDGPU_VA_OP_* */
+	__u32 operation;
+	/** AMDGPU_VM_PAGE_* */
+	__u32 flags;
+	/** va address to assign . Must be correctly aligned.*/
+	__u64 va_address;
+	/** Specify offset inside of BO to assign. Must be correctly aligned.*/
+	__u64 offset_in_bo;
+	/** Specify mapping size. Must be correctly aligned. */
+	__u64 map_size;
+	/**
+	 * vm_timeline_point is a sequence number used to add new timeline point.
+	 */
+	__u64 vm_timeline_point;
+	/**
+	 * The vm page table update fence is installed in given vm_timeline_syncobj_out
+	 * at vm_timeline_point.
+	 */
+	__u32 vm_timeline_syncobj_out;
+	/** the number of syncobj handles in @input_fence_syncobj_handles */
+	__u32 num_syncobj_handles;
+	/** Array of sync object handle to wait for given input fences */
+	__u64 input_fence_syncobj_handles;
+};
+
+#define AMDGPU_HW_IP_GFX          0
+#define AMDGPU_HW_IP_COMPUTE      1
+#define AMDGPU_HW_IP_DMA          2
+#define AMDGPU_HW_IP_UVD          3
+#define AMDGPU_HW_IP_VCE          4
+#define AMDGPU_HW_IP_UVD_ENC      5
+#define AMDGPU_HW_IP_VCN_DEC      6
+/*
+ * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support
+ * both encoding and decoding jobs.
+ */
+#define AMDGPU_HW_IP_VCN_ENC      7
+#define AMDGPU_HW_IP_VCN_JPEG     8
+#define AMDGPU_HW_IP_VPE          9
+#define AMDGPU_HW_IP_NUM          10
+
+#define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1
+
+#define AMDGPU_CHUNK_ID_IB		0x01
+#define AMDGPU_CHUNK_ID_FENCE		0x02
+#define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
+#define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
+#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
+#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
+#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES	0x07
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT    0x08
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL  0x09
+#define AMDGPU_CHUNK_ID_CP_GFX_SHADOW   0x0a
+
+struct drm_amdgpu_cs_chunk {
+	__u32		chunk_id;
+	__u32		length_dw;
+	__u64		chunk_data;
+};
+
+struct drm_amdgpu_cs_in {
+	/** Rendering context id */
+	__u32		ctx_id;
+	/**  Handle of resource list associated with CS */
+	__u32		bo_list_handle;
+	__u32		num_chunks;
+	__u32		flags;
+	/** this points to __u64 * which point to cs chunks */
+	__u64		chunks;
+};
+
+struct drm_amdgpu_cs_out {
+	__u64 handle;
+};
+
+union drm_amdgpu_cs {
+	struct drm_amdgpu_cs_in in;
+	struct drm_amdgpu_cs_out out;
+};
+
+/* Specify flags to be used for IB */
+
+/* This IB should be submitted to CE */
+#define AMDGPU_IB_FLAG_CE	(1<<0)
+
+/* Preamble flag, which means the IB could be dropped if no context switch */
+#define AMDGPU_IB_FLAG_PREAMBLE (1<<1)
+
+/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */
+#define AMDGPU_IB_FLAG_PREEMPT (1<<2)
+
+/* The IB fence should do the L2 writeback but not invalidate any shader
+ * caches (L2/vL1/sL1/I$). */
+#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
+
+/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
+ * This will reset wave ID counters for the IB.
+ */
+#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
+
+/* Flag the IB as secure (TMZ)
+ */
+#define AMDGPU_IB_FLAGS_SECURE  (1 << 5)
+
+/* Tell KMD to flush and invalidate caches
+ */
+#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC  (1 << 6)
+
+struct drm_amdgpu_cs_chunk_ib {
+	__u32 _pad;
+	/** AMDGPU_IB_FLAG_* */
+	__u32 flags;
+	/** Virtual address to begin IB execution */
+	__u64 va_start;
+	/** Size of submission */
+	__u32 ib_bytes;
+	/** HW IP to submit to */
+	__u32 ip_type;
+	/** HW IP index of the same type to submit to  */
+	__u32 ip_instance;
+	/** Ring index to submit to */
+	__u32 ring;
+};
+
+struct drm_amdgpu_cs_chunk_dep {
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
+	__u64 handle;
+};
+
+struct drm_amdgpu_cs_chunk_fence {
+	__u32 handle;
+	__u32 offset;
+};
+
+struct drm_amdgpu_cs_chunk_sem {
+	__u32 handle;
+};
+
+struct drm_amdgpu_cs_chunk_syncobj {
+       __u32 handle;
+       __u32 flags;
+       __u64 point;
+};
+
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ	0
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD	1
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD	2
+
+union drm_amdgpu_fence_to_handle {
+	struct {
+		struct drm_amdgpu_fence fence;
+		__u32 what;
+		__u32 pad;
+	} in;
+	struct {
+		__u32 handle;
+	} out;
+};
+
+struct drm_amdgpu_cs_chunk_data {
+	union {
+		struct drm_amdgpu_cs_chunk_ib		ib_data;
+		struct drm_amdgpu_cs_chunk_fence	fence_data;
+	};
+};
+
+#define AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW         0x1
+
+struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
+	__u64 shadow_va;
+	__u64 csa_va;
+	__u64 gds_va;
+	__u64 flags;
+};
+
+/*
+ *  Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU
+ *
+ */
+#define AMDGPU_IDS_FLAGS_FUSION			0x01
+#define AMDGPU_IDS_FLAGS_PREEMPTION		0x02
+#define AMDGPU_IDS_FLAGS_TMZ			0x04
+#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD	0x08
+#define AMDGPU_IDS_FLAGS_GANG_SUBMIT		0x10
+
+/*
+ *  Query h/w info: Flag identifying VF/PF/PT mode
+ *
+ */
+#define AMDGPU_IDS_FLAGS_MODE_MASK      0x300
+#define AMDGPU_IDS_FLAGS_MODE_SHIFT     0x8
+#define AMDGPU_IDS_FLAGS_MODE_PF        0x0
+#define AMDGPU_IDS_FLAGS_MODE_VF        0x1
+#define AMDGPU_IDS_FLAGS_MODE_PT        0x2
+
+/* indicate if acceleration can be working */
+#define AMDGPU_INFO_ACCEL_WORKING		0x00
+/* get the crtc_id from the mode object id? */
+#define AMDGPU_INFO_CRTC_FROM_ID		0x01
+/* query hw IP info */
+#define AMDGPU_INFO_HW_IP_INFO			0x02
+/* query hw IP instance count for the specified type */
+#define AMDGPU_INFO_HW_IP_COUNT			0x03
+/* timestamp for GL_ARB_timer_query */
+#define AMDGPU_INFO_TIMESTAMP			0x05
+/* Query the firmware version */
+#define AMDGPU_INFO_FW_VERSION			0x0e
+	/* Subquery id: Query VCE firmware version */
+	#define AMDGPU_INFO_FW_VCE		0x1
+	/* Subquery id: Query UVD firmware version */
+	#define AMDGPU_INFO_FW_UVD		0x2
+	/* Subquery id: Query GMC firmware version */
+	#define AMDGPU_INFO_FW_GMC		0x03
+	/* Subquery id: Query GFX ME firmware version */
+	#define AMDGPU_INFO_FW_GFX_ME		0x04
+	/* Subquery id: Query GFX PFP firmware version */
+	#define AMDGPU_INFO_FW_GFX_PFP		0x05
+	/* Subquery id: Query GFX CE firmware version */
+	#define AMDGPU_INFO_FW_GFX_CE		0x06
+	/* Subquery id: Query GFX RLC firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC		0x07
+	/* Subquery id: Query GFX MEC firmware version */
+	#define AMDGPU_INFO_FW_GFX_MEC		0x08
+	/* Subquery id: Query SMC firmware version */
+	#define AMDGPU_INFO_FW_SMC		0x0a
+	/* Subquery id: Query SDMA firmware version */
+	#define AMDGPU_INFO_FW_SDMA		0x0b
+	/* Subquery id: Query PSP SOS firmware version */
+	#define AMDGPU_INFO_FW_SOS		0x0c
+	/* Subquery id: Query PSP ASD firmware version */
+	#define AMDGPU_INFO_FW_ASD		0x0d
+	/* Subquery id: Query VCN firmware version */
+	#define AMDGPU_INFO_FW_VCN		0x0e
+	/* Subquery id: Query GFX RLC SRLC firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f
+	/* Subquery id: Query GFX RLC SRLG firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10
+	/* Subquery id: Query GFX RLC SRLS firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11
+	/* Subquery id: Query DMCU firmware version */
+	#define AMDGPU_INFO_FW_DMCU		0x12
+	#define AMDGPU_INFO_FW_TA		0x13
+	/* Subquery id: Query DMCUB firmware version */
+	#define AMDGPU_INFO_FW_DMCUB		0x14
+	/* Subquery id: Query TOC firmware version */
+	#define AMDGPU_INFO_FW_TOC		0x15
+	/* Subquery id: Query CAP firmware version */
+	#define AMDGPU_INFO_FW_CAP		0x16
+	/* Subquery id: Query GFX RLCP firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLCP		0x17
+	/* Subquery id: Query GFX RLCV firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLCV		0x18
+	/* Subquery id: Query MES_KIQ firmware version */
+	#define AMDGPU_INFO_FW_MES_KIQ		0x19
+	/* Subquery id: Query MES firmware version */
+	#define AMDGPU_INFO_FW_MES		0x1a
+	/* Subquery id: Query IMU firmware version */
+	#define AMDGPU_INFO_FW_IMU		0x1b
+	/* Subquery id: Query VPE firmware version */
+	#define AMDGPU_INFO_FW_VPE		0x1c
+
+/* number of bytes moved for TTM migration */
+#define AMDGPU_INFO_NUM_BYTES_MOVED		0x0f
+/* the used VRAM size */
+#define AMDGPU_INFO_VRAM_USAGE			0x10
+/* the used GTT size */
+#define AMDGPU_INFO_GTT_USAGE			0x11
+/* Information about GDS, etc. resource configuration */
+#define AMDGPU_INFO_GDS_CONFIG			0x13
+/* Query information about VRAM and GTT domains */
+#define AMDGPU_INFO_VRAM_GTT			0x14
+/* Query information about register in MMR address space*/
+#define AMDGPU_INFO_READ_MMR_REG		0x15
+/* Query information about device: rev id, family, etc. */
+#define AMDGPU_INFO_DEV_INFO			0x16
+/* visible vram usage */
+#define AMDGPU_INFO_VIS_VRAM_USAGE		0x17
+/* number of TTM buffer evictions */
+#define AMDGPU_INFO_NUM_EVICTIONS		0x18
+/* Query memory about VRAM and GTT domains */
+#define AMDGPU_INFO_MEMORY			0x19
+/* Query vce clock table */
+#define AMDGPU_INFO_VCE_CLOCK_TABLE		0x1A
+/* Query vbios related information */
+#define AMDGPU_INFO_VBIOS			0x1B
+	/* Subquery id: Query vbios size */
+	#define AMDGPU_INFO_VBIOS_SIZE		0x1
+	/* Subquery id: Query vbios image */
+	#define AMDGPU_INFO_VBIOS_IMAGE		0x2
+	/* Subquery id: Query vbios info */
+	#define AMDGPU_INFO_VBIOS_INFO		0x3
+/* Query UVD handles */
+#define AMDGPU_INFO_NUM_HANDLES			0x1C
+/* Query sensor related information */
+#define AMDGPU_INFO_SENSOR			0x1D
+	/* Subquery id: Query GPU shader clock */
+	#define AMDGPU_INFO_SENSOR_GFX_SCLK		0x1
+	/* Subquery id: Query GPU memory clock */
+	#define AMDGPU_INFO_SENSOR_GFX_MCLK		0x2
+	/* Subquery id: Query GPU temperature */
+	#define AMDGPU_INFO_SENSOR_GPU_TEMP		0x3
+	/* Subquery id: Query GPU load */
+	#define AMDGPU_INFO_SENSOR_GPU_LOAD		0x4
+	/* Subquery id: Query average GPU power	*/
+	#define AMDGPU_INFO_SENSOR_GPU_AVG_POWER	0x5
+	/* Subquery id: Query northbridge voltage */
+	#define AMDGPU_INFO_SENSOR_VDDNB		0x6
+	/* Subquery id: Query graphics voltage */
+	#define AMDGPU_INFO_SENSOR_VDDGFX		0x7
+	/* Subquery id: Query GPU stable pstate shader clock */
+	#define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK		0x8
+	/* Subquery id: Query GPU stable pstate memory clock */
+	#define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK		0x9
+	/* Subquery id: Query GPU peak pstate shader clock */
+	#define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK			0xa
+	/* Subquery id: Query GPU peak pstate memory clock */
+	#define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK			0xb
+	/* Subquery id: Query input GPU power	*/
+	#define AMDGPU_INFO_SENSOR_GPU_INPUT_POWER	0xc
+/* Number of VRAM page faults on CPU access. */
+#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS	0x1E
+#define AMDGPU_INFO_VRAM_LOST_COUNTER		0x1F
+/* query ras mask of enabled features*/
+#define AMDGPU_INFO_RAS_ENABLED_FEATURES	0x20
+/* RAS MASK: UMC (VRAM) */
+#define AMDGPU_INFO_RAS_ENABLED_UMC			(1 << 0)
+/* RAS MASK: SDMA */
+#define AMDGPU_INFO_RAS_ENABLED_SDMA			(1 << 1)
+/* RAS MASK: GFX */
+#define AMDGPU_INFO_RAS_ENABLED_GFX			(1 << 2)
+/* RAS MASK: MMHUB */
+#define AMDGPU_INFO_RAS_ENABLED_MMHUB			(1 << 3)
+/* RAS MASK: ATHUB */
+#define AMDGPU_INFO_RAS_ENABLED_ATHUB			(1 << 4)
+/* RAS MASK: PCIE */
+#define AMDGPU_INFO_RAS_ENABLED_PCIE			(1 << 5)
+/* RAS MASK: HDP */
+#define AMDGPU_INFO_RAS_ENABLED_HDP			(1 << 6)
+/* RAS MASK: XGMI */
+#define AMDGPU_INFO_RAS_ENABLED_XGMI			(1 << 7)
+/* RAS MASK: DF */
+#define AMDGPU_INFO_RAS_ENABLED_DF			(1 << 8)
+/* RAS MASK: SMN */
+#define AMDGPU_INFO_RAS_ENABLED_SMN			(1 << 9)
+/* RAS MASK: SEM */
+#define AMDGPU_INFO_RAS_ENABLED_SEM			(1 << 10)
+/* RAS MASK: MP0 */
+#define AMDGPU_INFO_RAS_ENABLED_MP0			(1 << 11)
+/* RAS MASK: MP1 */
+#define AMDGPU_INFO_RAS_ENABLED_MP1			(1 << 12)
+/* RAS MASK: FUSE */
+#define AMDGPU_INFO_RAS_ENABLED_FUSE			(1 << 13)
+/* query video encode/decode caps */
+#define AMDGPU_INFO_VIDEO_CAPS			0x21
+	/* Subquery id: Decode */
+	#define AMDGPU_INFO_VIDEO_CAPS_DECODE		0
+	/* Subquery id: Encode */
+	#define AMDGPU_INFO_VIDEO_CAPS_ENCODE		1
+/* Query the max number of IBs per gang per submission */
+#define AMDGPU_INFO_MAX_IBS			0x22
+/* query last page fault info */
+#define AMDGPU_INFO_GPUVM_FAULT			0x23
+/* query FW object size and alignment */
+#define AMDGPU_INFO_UQ_FW_AREAS			0x24
+
+/* Hybrid Stack Specific Defs*/
+/* gpu capability */
+#define AMDGPU_INFO_CAPABILITY			0x50
+/* virtual range */
+#define AMDGPU_INFO_VIRTUAL_RANGE		0x51
+/* query pin memory capability */
+#define AMDGPU_CAPABILITY_PIN_MEM_FLAG  (1 << 0)
+/* query direct gma capability */
+#define AMDGPU_CAPABILITY_DIRECT_GMA_FLAG	(1 << 1)
+
+#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
+#define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff
+#define AMDGPU_INFO_MMR_SH_INDEX_SHIFT	8
+#define AMDGPU_INFO_MMR_SH_INDEX_MASK	0xff
+
+struct drm_amdgpu_query_fw {
+	/** AMDGPU_INFO_FW_* */
+	__u32 fw_type;
+	/**
+	 * Index of the IP if there are more IPs of
+	 * the same type.
+	 */
+	__u32 ip_instance;
+	/**
+	 * Index of the engine. Whether this is used depends
+	 * on the firmware type. (e.g. MEC, SDMA)
+	 */
+	__u32 index;
+	__u32 _pad;
+};
+
+/* Input structure for the INFO ioctl */
+struct drm_amdgpu_info {
+	/* Where the return value will be stored */
+	__u64 return_pointer;
+	/* The size of the return value. Just like "size" in "snprintf",
+	 * it limits how many bytes the kernel can write. */
+	__u32 return_size;
+	/* The query request id. */
+	__u32 query;
+
+	union {
+		struct {
+			__u32 id;
+			__u32 _pad;
+		} mode_crtc;
+
+		struct {
+			/** AMDGPU_HW_IP_* */
+			__u32 type;
+			/**
+			 * Index of the IP if there are more IPs of the same
+			 * type. Ignored by AMDGPU_INFO_HW_IP_COUNT.
+			 */
+			__u32 ip_instance;
+		} query_hw_ip;
+
+		struct {
+			__u32 dword_offset;
+			/** number of registers to read */
+			__u32 count;
+			__u32 instance;
+			/** For future use, no flags defined so far */
+			__u32 flags;
+		} read_mmr_reg;
+
+		struct {
+			uint32_t aperture;
+			uint32_t _pad;
+		} virtual_range;
+
+		struct drm_amdgpu_query_fw query_fw;
+
+		struct {
+			__u32 type;
+			__u32 offset;
+		} vbios_info;
+
+		struct {
+			__u32 type;
+		} sensor_info;
+
+		struct {
+			__u32 type;
+		} video_cap;
+	};
+};
+
+struct drm_amdgpu_info_gds {
+	/** GDS GFX partition size */
+	__u32 gds_gfx_partition_size;
+	/** GDS compute partition size */
+	__u32 compute_partition_size;
+	/** total GDS memory size */
+	__u32 gds_total_size;
+	/** GWS size per GFX partition */
+	__u32 gws_per_gfx_partition;
+	/** GSW size per compute partition */
+	__u32 gws_per_compute_partition;
+	/** OA size per GFX partition */
+	__u32 oa_per_gfx_partition;
+	/** OA size per compute partition */
+	__u32 oa_per_compute_partition;
+	__u32 _pad;
+};
+
+struct drm_amdgpu_info_vram_gtt {
+	__u64 vram_size;
+	__u64 vram_cpu_accessible_size;
+	__u64 gtt_size;
+};
+
+struct drm_amdgpu_heap_info {
+	/** max. physical memory */
+	__u64 total_heap_size;
+
+	/** Theoretical max. available memory in the given heap */
+	__u64 usable_heap_size;
+
+	/**
+	 * Number of bytes allocated in the heap. This includes all processes
+	 * and private allocations in the kernel. It changes when new buffers
+	 * are allocated, freed, and moved. It cannot be larger than
+	 * heap_size.
+	 */
+	__u64 heap_usage;
+
+	/**
+	 * Theoretical possible max. size of buffer which
+	 * could be allocated in the given heap
+	 */
+	__u64 max_allocation;
+};
+
+struct drm_amdgpu_memory_info {
+	struct drm_amdgpu_heap_info vram;
+	struct drm_amdgpu_heap_info cpu_accessible_vram;
+	struct drm_amdgpu_heap_info gtt;
+};
+
+struct drm_amdgpu_info_firmware {
+	__u32 ver;
+	__u32 feature;
+};
+
+struct drm_amdgpu_info_vbios {
+	__u8 name[64];
+	__u8 vbios_pn[64];
+	__u32 version;
+	__u32 pad;
+	__u8 vbios_ver_str[32];
+	__u8 date[32];
+};
+
+#define AMDGPU_VRAM_TYPE_UNKNOWN 0
+#define AMDGPU_VRAM_TYPE_GDDR1 1
+#define AMDGPU_VRAM_TYPE_DDR2  2
+#define AMDGPU_VRAM_TYPE_GDDR3 3
+#define AMDGPU_VRAM_TYPE_GDDR4 4
+#define AMDGPU_VRAM_TYPE_GDDR5 5
+#define AMDGPU_VRAM_TYPE_HBM   6
+#define AMDGPU_VRAM_TYPE_DDR3  7
+#define AMDGPU_VRAM_TYPE_DDR4  8
+#define AMDGPU_VRAM_TYPE_GDDR6 9
+#define AMDGPU_VRAM_TYPE_DDR5  10
+#define AMDGPU_VRAM_TYPE_LPDDR4 11
+#define AMDGPU_VRAM_TYPE_LPDDR5 12
+#define AMDGPU_VRAM_TYPE_HBM3E 13
+
+#define AMDGPU_VRAM_TYPE_HBM_WIDTH 4096
+
+struct drm_amdgpu_info_device {
+	/** PCI Device ID */
+	__u32 device_id;
+	/** Internal chip revision: A0, A1, etc.) */
+	__u32 chip_rev;
+	__u32 external_rev;
+	/** Revision id in PCI Config space */
+	__u32 pci_rev;
+	__u32 family;
+	__u32 num_shader_engines;
+	__u32 num_shader_arrays_per_engine;
+	/* in KHz */
+	__u32 gpu_counter_freq;
+	__u64 max_engine_clock;
+	__u64 max_memory_clock;
+	/* cu information */
+	__u32 cu_active_number;
+	/* NOTE: cu_ao_mask is INVALID, DON'T use it */
+	__u32 cu_ao_mask;
+	__u32 cu_bitmap[4][4];
+	/** Render backend pipe mask. One render backend is CB+DB. */
+	__u32 enabled_rb_pipes_mask;
+	__u32 num_rb_pipes;
+	__u32 num_hw_gfx_contexts;
+	/* PCIe version (the smaller of the GPU and the CPU/motherboard) */
+	__u32 pcie_gen;
+	__u64 ids_flags;
+	/** Starting virtual address for UMDs. */
+	__u64 virtual_address_offset;
+	/** The maximum virtual address */
+	__u64 virtual_address_max;
+	/** Required alignment of virtual addresses. */
+	__u32 virtual_address_alignment;
+	/** Page table entry - fragment size */
+	__u32 pte_fragment_size;
+	__u32 gart_page_size;
+	/** constant engine ram size*/
+	__u32 ce_ram_size;
+	/** video memory type info*/
+	__u32 vram_type;
+	/** video memory bit width*/
+	__u32 vram_bit_width;
+	/* vce harvesting instance */
+	__u32 vce_harvest_config;
+	/* gfx double offchip LDS buffers */
+	__u32 gc_double_offchip_lds_buf;
+	/* NGG Primitive Buffer */
+	__u64 prim_buf_gpu_addr;
+	/* NGG Position Buffer */
+	__u64 pos_buf_gpu_addr;
+	/* NGG Control Sideband */
+	__u64 cntl_sb_buf_gpu_addr;
+	/* NGG Parameter Cache */
+	__u64 param_buf_gpu_addr;
+	__u32 prim_buf_size;
+	__u32 pos_buf_size;
+	__u32 cntl_sb_buf_size;
+	__u32 param_buf_size;
+	/* wavefront size*/
+	__u32 wave_front_size;
+	/* shader visible vgprs*/
+	__u32 num_shader_visible_vgprs;
+	/* CU per shader array*/
+	__u32 num_cu_per_sh;
+	/* number of tcc blocks*/
+	__u32 num_tcc_blocks;
+	/* gs vgt table depth*/
+	__u32 gs_vgt_table_depth;
+	/* gs primitive buffer depth*/
+	__u32 gs_prim_buffer_depth;
+	/* max gs wavefront per vgt*/
+	__u32 max_gs_waves_per_vgt;
+	/* PCIe number of lanes (the smaller of the GPU and the CPU/motherboard) */
+	__u32 pcie_num_lanes;
+	/* always on cu bitmap */
+	__u32 cu_ao_bitmap[4][4];
+	/** Starting high virtual address for UMDs. */
+	__u64 high_va_offset;
+	/** The maximum high virtual address */
+	__u64 high_va_max;
+	/* gfx10 pa_sc_tile_steering_override */
+	__u32 pa_sc_tile_steering_override;
+	/* disabled TCCs */
+	__u64 tcc_disabled_mask;
+	__u64 min_engine_clock;
+	__u64 min_memory_clock;
+	/* The following fields are only set on gfx11+, older chips set 0. */
+	__u32 tcp_cache_size;       /* AKA GL0, VMEM cache */
+	__u32 num_sqc_per_wgp;
+	__u32 sqc_data_cache_size;  /* AKA SMEM cache */
+	__u32 sqc_inst_cache_size;
+	__u32 gl1c_cache_size;
+	__u32 gl2c_cache_size;
+	__u64 mall_size;            /* AKA infinity cache */
+	/* high 32 bits of the rb pipes mask */
+	__u32 enabled_rb_pipes_mask_hi;
+	/* shadow area size for gfx11 */
+	__u32 shadow_size;
+	/* shadow area base virtual alignment for gfx11 */
+	__u32 shadow_alignment;
+	/* context save area size for gfx11 */
+	__u32 csa_size;
+	/* context save area base virtual alignment for gfx11 */
+	__u32 csa_alignment;
+	/* Userq IP mask (1 << AMDGPU_HW_IP_*) */
+	__u32 userq_ip_mask;
+	__u32 pad;
+};
+
+struct drm_amdgpu_info_hw_ip {
+	/** Version of h/w IP */
+	__u32  hw_ip_version_major;
+	__u32  hw_ip_version_minor;
+	/** Capabilities */
+	__u64  capabilities_flags;
+	/** command buffer address start alignment*/
+	__u32  ib_start_alignment;
+	/** command buffer size alignment*/
+	__u32  ib_size_alignment;
+	/** Bitmask of available rings. Bit 0 means ring 0, etc. */
+	__u32  available_rings;
+	/** version info: bits 23:16 major, 15:8 minor, 7:0 revision */
+	__u32  ip_discovery_version;
+	/* Userq available slots */
+	__u32  userq_num_slots;
+};
+
+/* GFX metadata BO sizes and alignment info (in bytes) */
+struct drm_amdgpu_info_uq_fw_areas_gfx {
+	/* shadow area size */
+	__u32 shadow_size;
+	/* shadow area base virtual mem alignment */
+	__u32 shadow_alignment;
+	/* context save area size */
+	__u32 csa_size;
+	/* context save area base virtual mem alignment */
+	__u32 csa_alignment;
+};
+
+/* IP specific fw related information used in the
+ * subquery AMDGPU_INFO_UQ_FW_AREAS
+ */
+struct drm_amdgpu_info_uq_fw_areas {
+	union {
+		struct drm_amdgpu_info_uq_fw_areas_gfx gfx;
+	};
+};
+
+struct drm_amdgpu_info_num_handles {
+	/** Max handles as supported by firmware for UVD */
+	__u32  uvd_max_handles;
+	/** Handles currently in use for UVD */
+	__u32  uvd_used_handles;
+};
+
+#define AMDGPU_VCE_CLOCK_TABLE_ENTRIES		6
+
+struct drm_amdgpu_info_vce_clock_table_entry {
+	/** System clock */
+	__u32 sclk;
+	/** Memory clock */
+	__u32 mclk;
+	/** VCE clock */
+	__u32 eclk;
+	__u32 pad;
+};
+
+struct drm_amdgpu_info_vce_clock_table {
+	struct drm_amdgpu_info_vce_clock_table_entry entries[AMDGPU_VCE_CLOCK_TABLE_ENTRIES];
+	__u32 num_valid_entries;
+	__u32 pad;
+};
+
+/* query video encode/decode caps */
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2			0
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4			1
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1			2
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC		3
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC			4
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG			5
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9			6
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1			7
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT			8
+
+struct drm_amdgpu_info_video_codec_info {
+	__u32 valid;
+	__u32 max_width;
+	__u32 max_height;
+	__u32 max_pixels_per_frame;
+	__u32 max_level;
+	__u32 pad;
+};
+
+struct drm_amdgpu_info_video_caps {
+	struct drm_amdgpu_info_video_codec_info codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT];
+};
+
+#define AMDGPU_VMHUB_TYPE_MASK			0xff
+#define AMDGPU_VMHUB_TYPE_SHIFT			0
+#define AMDGPU_VMHUB_TYPE_GFX			0
+#define AMDGPU_VMHUB_TYPE_MM0			1
+#define AMDGPU_VMHUB_TYPE_MM1			2
+#define AMDGPU_VMHUB_IDX_MASK			0xff00
+#define AMDGPU_VMHUB_IDX_SHIFT			8
+
+struct drm_amdgpu_info_gpuvm_fault {
+	__u64 addr;
+	__u32 status;
+	__u32 vmhub;
+};
+
+struct drm_amdgpu_info_uq_metadata_gfx {
+	/* shadow area size for gfx11 */
+	__u32 shadow_size;
+	/* shadow area base virtual alignment for gfx11 */
+	__u32 shadow_alignment;
+	/* context save area size for gfx11 */
+	__u32 csa_size;
+	/* context save area base virtual alignment for gfx11 */
+	__u32 csa_alignment;
+};
+
+struct drm_amdgpu_info_uq_metadata {
+	union {
+		struct drm_amdgpu_info_uq_metadata_gfx gfx;
+	};
+};
+
+/*
+ * Supported GPU families
+ */
+#define AMDGPU_FAMILY_UNKNOWN			0
+#define AMDGPU_FAMILY_SI			110 /* Hainan, Oland, Verde, Pitcairn, Tahiti */
+#define AMDGPU_FAMILY_CI			120 /* Bonaire, Hawaii */
+#define AMDGPU_FAMILY_KV			125 /* Kaveri, Kabini, Mullins */
+#define AMDGPU_FAMILY_VI			130 /* Iceland, Tonga */
+#define AMDGPU_FAMILY_CZ			135 /* Carrizo, Stoney */
+#define AMDGPU_FAMILY_AI			141 /* Vega10 */
+#define AMDGPU_FAMILY_RV			142 /* Raven */
+#define AMDGPU_FAMILY_NV			143 /* Navi10 */
+#define AMDGPU_FAMILY_VGH			144 /* Van Gogh */
+#define AMDGPU_FAMILY_GC_11_0_0			145 /* GC 11.0.0 */
+#define AMDGPU_FAMILY_YC			146 /* Yellow Carp */
+#define AMDGPU_FAMILY_GC_11_0_1			148 /* GC 11.0.1 */
+#define AMDGPU_FAMILY_GC_10_3_6			149 /* GC 10.3.6 */
+#define AMDGPU_FAMILY_GC_10_3_7			151 /* GC 10.3.7 */
+#define AMDGPU_FAMILY_GC_11_5_0			150 /* GC 11.5.0 */
+#define AMDGPU_FAMILY_GC_12_0_0			152 /* GC 12.0.0 */
+
+#ifndef HAVE_DRM_COLOR_CTM_3X4
+/* FIXME wrong namespace! */
+struct drm_color_ctm_3x4 {
+	/*
+	 * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude
+	 * (not two's complement!) format.
+	 */
+	__u64 matrix[12];
+};
+#endif
+
+/**
+ *  Definition of System Unified Address (SUA) apertures
+ */
+#define AMDGPU_SUA_APERTURE_PRIVATE    1
+#define AMDGPU_SUA_APERTURE_SHARED     2
+struct drm_amdgpu_virtual_range {
+	uint64_t start;
+	uint64_t end;
+};
+
+struct drm_amdgpu_capability {
+	__u32 flag;
+	__u32 direct_gma_size;
+};
+
+/*
+ * Definition of free sync enter and exit signals
+ * We may have more options in the future
+ */
+#define AMDGPU_FREESYNC_FULLSCREEN_ENTER                1
+#define AMDGPU_FREESYNC_FULLSCREEN_EXIT                 2
+
+struct drm_amdgpu_freesync {
+        __u32 op;                       /* AMDGPU_FREESYNC_FULLSCREEN_ENTER or */
+                                        /* AMDGPU_FREESYNC_FULLSCREEN_ENTER */
+        __u32 spare[7];
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c
index 0a55e34a2..ee55bde0a 100644
--- a/plugins/amdgpu/amdgpu_plugin.c
+++ b/plugins/amdgpu/amdgpu_plugin.c
@@ -12,72 +12,42 @@
 #include <sys/sysmacros.h>
 #include <sys/mman.h>
 #include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
 #include <stdint.h>
 #include <pthread.h>
 #include <semaphore.h>
 
 #include <xf86drm.h>
 #include <libdrm/amdgpu.h>
-#include <libdrm/amdgpu_drm.h>
 
 #include "criu-plugin.h"
 #include "plugin.h"
 #include "criu-amdgpu.pb-c.h"
+#include "util.h"
+#include "util-pie.h"
+#include "fdstore.h"
 
 #include "kfd_ioctl.h"
 #include "xmalloc.h"
 #include "criu-log.h"
 #include "files.h"
+#include "pstree.h"
+#include "sockets.h"
+#include "rst-malloc.h"
 
 #include "common/list.h"
+#include "amdgpu_drm.h"
+#include "amdgpu_plugin_dmabuf.h"
+#include "amdgpu_plugin_drm.h"
+#include "amdgpu_plugin_util.h"
 #include "amdgpu_plugin_topology.h"
+#include "amdgpu_socket_utils.h"
 
 #include "img-streamer.h"
 #include "image.h"
 #include "cr_options.h"
-
-#define AMDGPU_KFD_DEVICE "/dev/kfd"
-#define PROCPIDMEM	  "/proc/%d/mem"
-#define HSAKMT_SHM_PATH	  "/dev/shm/hsakmt_shared_mem"
-#define HSAKMT_SHM	  "/hsakmt_shared_mem"
-#define HSAKMT_SEM_PATH	  "/dev/shm/sem.hsakmt_semaphore"
-#define HSAKMT_SEM	  "hsakmt_semaphore"
-
-#define KFD_IOCTL_MAJOR_VERSION	    1
-#define MIN_KFD_IOCTL_MINOR_VERSION 8
-
-#define IMG_KFD_FILE	 "amdgpu-kfd-%d.img"
-#define IMG_RENDERD_FILE "amdgpu-renderD-%d.img"
-#define IMG_PAGES_FILE	 "amdgpu-pages-%d-%04x.img"
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE 1
-#endif
-
-#ifdef LOG_PREFIX
-#undef LOG_PREFIX
-#endif
-#define LOG_PREFIX "amdgpu_plugin: "
-
-#ifdef DEBUG
-#define plugin_log_msg(fmt, ...) pr_debug(fmt, ##__VA_ARGS__)
-#else
-#define plugin_log_msg(fmt, ...) \
-	{                        \
-	}
-#endif
-
-#define SDMA_PACKET(op, sub_op, e) ((((e)&0xFFFF) << 16) | (((sub_op)&0xFF) << 8) | (((op)&0xFF) << 0))
-
-#define SDMA_OPCODE_COPY	    1
-#define SDMA_COPY_SUB_OPCODE_LINEAR 0
-#define SDMA_NOP		    0
-#define SDMA_LINEAR_COPY_MAX_SIZE   (1ULL << 21)
-
-enum sdma_op_type {
-	SDMA_OP_VRAM_READ,
-	SDMA_OP_VRAM_WRITE,
-};
+#include "util.h"
 
 struct vma_metadata {
 	struct list_head list;
@@ -89,140 +59,42 @@ struct vma_metadata {
 };
 
 /************************************ Global Variables ********************************************/
-struct tp_system src_topology;
-struct tp_system dest_topology;
-
-struct device_maps checkpoint_maps;
-struct device_maps restore_maps;
-
-extern int fd_next;
 
 static LIST_HEAD(update_vma_info_list);
 
-extern bool kfd_fw_version_check;
-extern bool kfd_sdma_fw_version_check;
-extern bool kfd_caches_count_check;
-extern bool kfd_num_gws_check;
-extern bool kfd_vram_size_check;
-extern bool kfd_numa_check;
-extern bool kfd_capability_check;
+size_t kfd_max_buffer_size;
 
+bool plugin_added_to_inventory = false;
+
+bool plugin_disabled = false;
+
+struct handle_id {
+	int handle;
+	int fdstore_id;
+};
+struct shared_handle_ids {
+	int num_handles;
+	struct handle_id *handles;
+};
+struct shared_handle_ids *shared_memory = NULL;
+
+static mutex_t *shared_memory_mutex;
+
+int current_pid;
+/*
+ * In the case of a single process (common case), this optimization can effectively
+ * reduce the restore latency with parallel restore. In the case of multiple processes,
+ * states are already restored in parallel within different processes. Therefore, this
+ * optimization does not introduce further improvement and will be disabled by default
+ * in this case. The flag, parallel_disabled, is used to control whether the
+ * optimization is enabled or disabled.
+ */
+bool parallel_disabled = false;
+
+pthread_t parallel_thread = 0;
+int parallel_thread_result = 0;
 /**************************************************************************************************/
 
-int write_fp(FILE *fp, const void *buf, const size_t buf_len)
-{
-	size_t len_write;
-
-	len_write = fwrite(buf, 1, buf_len, fp);
-	if (len_write != buf_len) {
-		pr_perror("Unable to write file (wrote:%ld buf_len:%ld)", len_write, buf_len);
-		return -EIO;
-	}
-	return 0;
-}
-
-int read_fp(FILE *fp, void *buf, const size_t buf_len)
-{
-	size_t len_read;
-
-	len_read = fread(buf, 1, buf_len, fp);
-	if (len_read != buf_len) {
-		pr_perror("Unable to read file (read:%ld buf_len:%ld)", len_read, buf_len);
-		return -EIO;
-	}
-	return 0;
-}
-
-/**
- * @brief Open an image file
- *
- * We store the size of the actual contents in the first 8-bytes of the file. This allows us to
- * determine the file size when using criu_image_streamer when fseek and fstat are not available.
- * The FILE * returned is already at the location of the first actual contents.
- *
- * @param path The file path
- * @param write False for read, true for write
- * @param size Size of actual contents
- * @return FILE *if successful, NULL if failed
- */
-FILE *open_img_file(char *path, bool write, size_t *size)
-{
-	FILE *fp = NULL;
-	int fd, ret;
-
-	if (opts.stream)
-		fd = img_streamer_open(path, write ? O_DUMP : O_RSTR);
-	else
-		fd = openat(criu_get_image_dir(), path, write ? (O_WRONLY | O_CREAT) : O_RDONLY, 0600);
-
-	if (fd < 0) {
-		pr_perror("%s: Failed to open for %s", path, write ? "write" : "read");
-		return NULL;
-	}
-
-	fp = fdopen(fd, write ? "w" : "r");
-	if (!fp) {
-		pr_perror("%s: Failed get pointer for %s", path, write ? "write" : "read");
-		return NULL;
-	}
-
-	if (write)
-		ret = write_fp(fp, size, sizeof(*size));
-	else
-		ret = read_fp(fp, size, sizeof(*size));
-
-	if (ret) {
-		pr_perror("%s:Failed to access file size", path);
-		fclose(fp);
-		return NULL;
-	}
-
-	pr_debug("%s:Opened file for %s with size:%ld\n", path, write ? "write" : "read", *size);
-	return fp;
-}
-
-/**
- * @brief Write an image file
- *
- * We store the size of the actual contents in the first 8-bytes of the file. This allows us to
- * determine the file size when using criu_image_streamer when fseek and fstat are not available.
- *
- * @param path The file path
- * @param buf pointer to data to be written
- * @param buf_len size of buf
- * @return 0 if successful. -errno on failure
- */
-int write_img_file(char *path, const void *buf, const size_t buf_len)
-{
-	int ret;
-	FILE *fp;
-	size_t len = buf_len;
-
-	fp = open_img_file(path, true, &len);
-	if (!fp)
-		return -errno;
-
-	ret = write_fp(fp, buf, buf_len);
-	fclose(fp); /* this will also close fd */
-	return ret;
-}
-
-int read_file(const char *file_path, void *buf, const size_t buf_len)
-{
-	int ret;
-	FILE *fp;
-
-	fp = fopen(file_path, "r");
-	if (!fp) {
-		pr_perror("Cannot fopen %s", file_path);
-		return -errno;
-	}
-
-	ret = read_fp(fp, buf, buf_len);
-	fclose(fp); /* this will also close fd */
-	return ret;
-}
-
 /* Call ioctl, restarting if it is interrupted */
 int kmtIoctl(int fd, unsigned long request, void *arg)
 {
@@ -260,21 +132,21 @@ static void free_e(CriuKfd *e)
 
 static int allocate_device_entries(CriuKfd *e, int num_of_devices)
 {
-	e->device_entries = xmalloc(sizeof(DeviceEntry *) * num_of_devices);
+	e->device_entries = xmalloc(sizeof(KfdDeviceEntry *) * num_of_devices);
 	if (!e->device_entries) {
 		pr_err("Failed to allocate device_entries\n");
 		return -ENOMEM;
 	}
 
 	for (int i = 0; i < num_of_devices; i++) {
-		DeviceEntry *entry = xzalloc(sizeof(*entry));
+		KfdDeviceEntry *entry = xzalloc(sizeof(*entry));
 
 		if (!entry) {
 			pr_err("Failed to allocate entry\n");
 			return -ENOMEM;
 		}
 
-		device_entry__init(entry);
+		kfd_device_entry__init(entry);
 
 		e->device_entries[i] = entry;
 		e->n_device_entries++;
@@ -284,21 +156,21 @@ static int allocate_device_entries(CriuKfd *e, int num_of_devices)
 
 static int allocate_bo_entries(CriuKfd *e, int num_bos, struct kfd_criu_bo_bucket *bo_bucket_ptr)
 {
-	e->bo_entries = xmalloc(sizeof(BoEntry *) * num_bos);
+	e->bo_entries = xmalloc(sizeof(KfdBoEntry *) * num_bos);
 	if (!e->bo_entries) {
 		pr_err("Failed to allocate bo_info\n");
 		return -ENOMEM;
 	}
 
 	for (int i = 0; i < num_bos; i++) {
-		BoEntry *entry = xzalloc(sizeof(*entry));
+		KfdBoEntry *entry = xzalloc(sizeof(*entry));
 
 		if (!entry) {
 			pr_err("Failed to allocate botest\n");
 			return -ENOMEM;
 		}
 
-		bo_entry__init(entry);
+		kfd_bo_entry__init(entry);
 
 		e->bo_entries[i] = entry;
 		e->n_bo_entries++;
@@ -306,13 +178,13 @@ static int allocate_bo_entries(CriuKfd *e, int num_bos, struct kfd_criu_bo_bucke
 	return 0;
 }
 
-int topology_to_devinfo(struct tp_system *sys, struct device_maps *maps, DeviceEntry **deviceEntries)
+int topology_to_devinfo(struct tp_system *sys, struct device_maps *maps, KfdDeviceEntry **deviceEntries)
 {
 	uint32_t devinfo_index = 0;
 	struct tp_node *node;
 
 	list_for_each_entry(node, &sys->nodes, listm_system) {
-		DeviceEntry *devinfo = deviceEntries[devinfo_index++];
+		KfdDeviceEntry *devinfo = deviceEntries[devinfo_index++];
 
 		devinfo->node_id = node->id;
 
@@ -380,11 +252,11 @@ int topology_to_devinfo(struct tp_system *sys, struct device_maps *maps, DeviceE
 	return 0;
 }
 
-int devinfo_to_topology(DeviceEntry *devinfos[], uint32_t num_devices, struct tp_system *sys)
+int devinfo_to_topology(KfdDeviceEntry *devinfos[], uint32_t num_devices, struct tp_system *sys)
 {
 	for (int i = 0; i < num_devices; i++) {
 		struct tp_node *node;
-		DeviceEntry *devinfo = devinfos[i];
+		KfdDeviceEntry *devinfo = devinfos[i];
 
 		node = sys_add_node(sys, devinfo->node_id, devinfo->gpu_id);
 		if (!node)
@@ -449,9 +321,56 @@ void getenv_bool(const char *var, bool *value)
 	pr_info("param: %s:%s\n", var, *value ? "Y" : "N");
 }
 
+void getenv_size_t(const char *var, size_t *value)
+{
+	char *value_str = getenv(var);
+	char *endp = value_str;
+	int sh = 0;
+	size_t size;
+
+	if (value_str) {
+		size = (size_t)strtoul(value_str, &endp, 0);
+		if (errno || value_str == endp) {
+			pr_err("Ignoring invalid value for %s=%s, expecting a positive integer\n", var, value_str);
+			return;
+		}
+		switch (*endp) {
+		case 'k':
+		case 'K':
+			sh = 10;
+			break;
+		case 'M':
+			sh = 20;
+			break;
+		case 'G':
+			sh = 30;
+			break;
+		case '\0':
+			sh = 0;
+			break;
+		default:
+			pr_err("Ignoring invalid size suffix for %s=%s, expecting 'K'/k', 'M', or 'G'\n", var, value_str);
+			return;
+		}
+		if (SIZE_MAX >> sh < size) {
+			pr_err("Ignoring invalid value for %s=%s, exceeds SIZE_MAX\n", var, value_str);
+			return;
+		}
+		*value = size << sh;
+	}
+	pr_info("param: %s:0x%lx\n", var, *value);
+}
+
 int amdgpu_plugin_init(int stage)
 {
-	pr_info("amdgpu_plugin: initialized:  %s (AMDGPU/KFD)\n", CR_PLUGIN_DESC.name);
+	if (stage == CR_PLUGIN_STAGE__RESTORE) {
+		if (!check_and_remove_inventory_plugin(CR_PLUGIN_DESC.name, strlen(CR_PLUGIN_DESC.name))) {
+			plugin_disabled = true;
+			return 0;
+		}
+	}
+
+	pr_info("initialized:  %s (AMDGPU/KFD)\n", CR_PLUGIN_DESC.name);
 
 	topology_init(&src_topology);
 	topology_init(&dest_topology);
@@ -459,6 +378,15 @@ int amdgpu_plugin_init(int stage)
 	maps_init(&restore_maps);
 
 	if (stage == CR_PLUGIN_STAGE__RESTORE) {
+		if (has_children(root_item)) {
+			pr_info("Parallel restore disabled\n");
+			parallel_disabled = true;
+		} else {
+			if (install_parallel_sock() < 0) {
+				pr_err("Failed to install parallel socket\n");
+				return -1;
+			}
+		}
 		/* Default Values */
 		kfd_fw_version_check = true;
 		kfd_sdma_fw_version_check = true;
@@ -476,12 +404,18 @@ int amdgpu_plugin_init(int stage)
 		getenv_bool("KFD_NUMA_CHECK", &kfd_numa_check);
 		getenv_bool("KFD_CAPABILITY_CHECK", &kfd_capability_check);
 	}
+	kfd_max_buffer_size = 0;
+	getenv_size_t("KFD_MAX_BUFFER_SIZE", &kfd_max_buffer_size);
+
 	return 0;
 }
 
 void amdgpu_plugin_fini(int stage, int ret)
 {
-	pr_info("amdgpu_plugin: finished  %s (AMDGPU/KFD)\n", CR_PLUGIN_DESC.name);
+	if (plugin_disabled)
+		return;
+
+	pr_info("finished  %s (AMDGPU/KFD)\n", CR_PLUGIN_DESC.name);
 
 	if (stage == CR_PLUGIN_STAGE__RESTORE)
 		sys_close_drm_render_devices(&dest_topology);
@@ -501,7 +435,7 @@ struct thread_data {
 	uint32_t gpu_id;
 	pid_t pid;
 	struct kfd_criu_bo_bucket *bo_buckets;
-	BoEntry **bo_entries;
+	KfdBoEntry **bo_entries;
 	int drm_fd;
 	int ret;
 	int id; /* File ID used by CRIU to identify KFD image for this process */
@@ -509,38 +443,36 @@ struct thread_data {
 
 int amdgpu_plugin_handle_device_vma(int fd, const struct stat *st_buf)
 {
-	struct stat st_kfd, st_dri_min;
-	char img_path[128];
+	struct stat st_kfd;
 	int ret = 0;
 
-	pr_debug("amdgpu_plugin: Enter %s\n", __func__);
+	pr_debug("Enter %s\n", __func__);
 	ret = stat(AMDGPU_KFD_DEVICE, &st_kfd);
 	if (ret == -1) {
 		pr_perror("stat error for /dev/kfd");
 		return ret;
 	}
 
-	snprintf(img_path, sizeof(img_path), "/dev/dri/renderD%d", DRM_FIRST_RENDER_NODE);
-
-	ret = stat(img_path, &st_dri_min);
-	if (ret == -1) {
-		pr_perror("stat error for %s", img_path);
-		return ret;
-	}
-
-	if (major(st_buf->st_rdev) == major(st_kfd.st_rdev) || ((major(st_buf->st_rdev) == major(st_dri_min.st_rdev)) &&
-								(minor(st_buf->st_rdev) >= minor(st_dri_min.st_rdev) &&
-								 minor(st_buf->st_rdev) >= DRM_FIRST_RENDER_NODE))) {
+	/* If input device is KFD return device as supported */
+	if (major(st_buf->st_rdev) == major(st_kfd.st_rdev)) {
 		pr_debug("Known non-regular mapping, kfd-renderD%d -> OK\n", minor(st_buf->st_rdev));
-		pr_debug("AMD KFD(maj) = %d, DRI(maj,min) = %d:%d VMA Device fd(maj,min) = %d:%d\n",
-			 major(st_kfd.st_rdev), major(st_dri_min.st_rdev), minor(st_dri_min.st_rdev),
-			 major(st_buf->st_rdev), minor(st_buf->st_rdev));
-		/* VMA belongs to kfd */
 		return 0;
 	}
 
-	pr_perror("amdgpu_plugin: Can't handle the VMA mapping");
-	return -ENOTSUP;
+	/* Determine if input is a DRM device and therefore is supported */
+	ret = amdgpu_plugin_drm_handle_device_vma(fd, st_buf);
+	if (ret)
+		pr_perror("%s(), Can't handle VMAs of input device", __func__);
+
+	if (!ret && !plugin_added_to_inventory) {
+		ret = add_inventory_plugin(CR_PLUGIN_DESC.name);
+		if (ret)
+			pr_err("Failed to add AMDGPU plugin to inventory image\n");
+		else
+			plugin_added_to_inventory = true;
+	}
+
+	return ret;
 }
 CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA, amdgpu_plugin_handle_device_vma)
 
@@ -607,16 +539,15 @@ void free_and_unmap(uint64_t size, amdgpu_bo_handle h_bo, amdgpu_va_handle h_va,
 	amdgpu_bo_free(h_bo);
 }
 
-int sdma_copy_bo(struct kfd_criu_bo_bucket *bo_buckets, void *userptr, int i, amdgpu_device_handle h_dev,
-		 uint64_t max_copy_size, enum sdma_op_type type)
+int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp,
+		 void *buffer, size_t buffer_size, amdgpu_device_handle h_dev,
+		 uint64_t max_copy_size, enum sdma_op_type type, bool do_not_free)
 {
-	uint64_t size, gpu_addr_src, gpu_addr_dest, gpu_addr_ib;
-	uint64_t gpu_addr_src_orig, gpu_addr_dest_orig;
-	amdgpu_va_handle h_va_src, h_va_dest, h_va_ib;
-	amdgpu_bo_handle h_bo_src, h_bo_dest, h_bo_ib;
+	uint64_t src_bo_size, dst_bo_size, buffer_bo_size, bytes_remain, buffer_space_remain;
+	uint64_t gpu_addr_src, gpu_addr_dst, gpu_addr_ib, copy_src, copy_dst, copy_size;
+	amdgpu_va_handle h_va_src, h_va_dst, h_va_ib;
+	amdgpu_bo_handle h_bo_src, h_bo_dst, h_bo_ib;
 	struct amdgpu_bo_import_result res = { 0 };
-	uint64_t copy_size, bytes_remain, j = 0;
-	uint64_t n_packets;
 	struct amdgpu_cs_ib_info ib_info;
 	amdgpu_bo_list_handle h_bo_list;
 	struct amdgpu_cs_request cs_req;
@@ -625,102 +556,98 @@ int sdma_copy_bo(struct kfd_criu_bo_bucket *bo_buckets, void *userptr, int i, am
 	uint32_t expired;
 	amdgpu_context_handle h_ctx;
 	uint32_t *ib = NULL;
-	int err, shared_fd;
+	int j, err, packets_per_buffer;
 
-	shared_fd = bo_buckets[i].dmabuf_fd;
-	size = bo_buckets[i].size;
+	buffer_bo_size = min(size, buffer_size);
+	packets_per_buffer = ((buffer_bo_size - 1) / max_copy_size) + 1;
+	src_bo_size = (type == SDMA_OP_VRAM_WRITE) ? buffer_bo_size : size;
+	dst_bo_size = (type == SDMA_OP_VRAM_READ) ? buffer_bo_size : size;
 
 	plugin_log_msg("Enter %s\n", __func__);
 
 	/* prepare src buffer */
 	switch (type) {
 	case SDMA_OP_VRAM_WRITE:
-		err = amdgpu_create_bo_from_user_mem(h_dev, userptr, size, &h_bo_src);
+		err = amdgpu_create_bo_from_user_mem(h_dev, buffer, src_bo_size, &h_bo_src);
 		if (err) {
 			pr_perror("failed to create userptr for sdma");
 			return -EFAULT;
 		}
-
 		break;
-
 	case SDMA_OP_VRAM_READ:
 		err = amdgpu_bo_import(h_dev, amdgpu_bo_handle_type_dma_buf_fd, shared_fd, &res);
 		if (err) {
 			pr_perror("failed to import dmabuf handle from libdrm");
 			return -EFAULT;
 		}
-
 		h_bo_src = res.buf_handle;
 		break;
-
 	default:
 		pr_perror("Invalid sdma operation");
 		return -EINVAL;
 	}
 
-	err = amdgpu_va_range_alloc(h_dev, amdgpu_gpu_va_range_general, size, 0x1000, 0, &gpu_addr_src, &h_va_src, 0);
+	err = amdgpu_va_range_alloc(h_dev, amdgpu_gpu_va_range_general, src_bo_size, 0x1000, 0, &gpu_addr_src,
+				    &h_va_src, 0);
 	if (err) {
 		pr_perror("failed to alloc VA for src bo");
 		goto err_src_va;
 	}
-	err = amdgpu_bo_va_op(h_bo_src, 0, size, gpu_addr_src, 0, AMDGPU_VA_OP_MAP);
+	err = amdgpu_bo_va_op(h_bo_src, 0, src_bo_size, gpu_addr_src, 0, AMDGPU_VA_OP_MAP);
 	if (err) {
 		pr_perror("failed to GPU map the src BO");
 		goto err_src_bo_map;
 	}
-	plugin_log_msg("Source BO: GPU VA: %lx, size: %lx\n", gpu_addr_src, size);
+	plugin_log_msg("Source BO: GPU VA: %lx, size: %lx\n", gpu_addr_src, src_bo_size);
+
 	/* prepare dest buffer */
 	switch (type) {
 	case SDMA_OP_VRAM_WRITE:
 		err = amdgpu_bo_import(h_dev, amdgpu_bo_handle_type_dma_buf_fd, shared_fd, &res);
 		if (err) {
 			pr_perror("failed to import dmabuf handle from libdrm");
-			goto err_dest_bo_prep;
+			goto err_dst_bo_prep;
 		}
-
-		h_bo_dest = res.buf_handle;
+		h_bo_dst = res.buf_handle;
 		break;
-
 	case SDMA_OP_VRAM_READ:
-		err = amdgpu_create_bo_from_user_mem(h_dev, userptr, size, &h_bo_dest);
+		err = amdgpu_create_bo_from_user_mem(h_dev, buffer, dst_bo_size, &h_bo_dst);
 		if (err) {
 			pr_perror("failed to create userptr for sdma");
-			goto err_dest_bo_prep;
+			goto err_dst_bo_prep;
 		}
 		break;
-
 	default:
 		pr_perror("Invalid sdma operation");
-		goto err_dest_bo_prep;
+		goto err_dst_bo_prep;
 	}
 
-	err = amdgpu_va_range_alloc(h_dev, amdgpu_gpu_va_range_general, size, 0x1000, 0, &gpu_addr_dest, &h_va_dest, 0);
+	err = amdgpu_va_range_alloc(h_dev, amdgpu_gpu_va_range_general, dst_bo_size, 0x1000, 0, &gpu_addr_dst,
+				    &h_va_dst, 0);
 	if (err) {
 		pr_perror("failed to alloc VA for dest bo");
-		goto err_dest_va;
+		goto err_dst_va;
 	}
-	err = amdgpu_bo_va_op(h_bo_dest, 0, size, gpu_addr_dest, 0, AMDGPU_VA_OP_MAP);
+	err = amdgpu_bo_va_op(h_bo_dst, 0, dst_bo_size, gpu_addr_dst, 0, AMDGPU_VA_OP_MAP);
 	if (err) {
 		pr_perror("failed to GPU map the dest BO");
-		goto err_dest_bo_map;
+		goto err_dst_bo_map;
 	}
-	plugin_log_msg("Dest BO: GPU VA: %lx, size: %lx\n", gpu_addr_dest, size);
+	plugin_log_msg("Dest BO: GPU VA: %lx, size: %lx\n", gpu_addr_dst, dst_bo_size);
 
-	n_packets = (size + max_copy_size) / max_copy_size;
 	/* prepare ring buffer/indirect buffer for command submission
 	 * each copy packet is 7 dwords so we need to alloc 28x size for ib
 	 */
-	err = alloc_and_map(h_dev, n_packets * 28, AMDGPU_GEM_DOMAIN_GTT, &h_bo_ib, &h_va_ib, &gpu_addr_ib,
+	err = alloc_and_map(h_dev, packets_per_buffer * 28, AMDGPU_GEM_DOMAIN_GTT, &h_bo_ib, &h_va_ib, &gpu_addr_ib,
 			    (void **)&ib);
 	if (err) {
 		pr_perror("failed to allocate and map ib/rb");
 		goto err_ib_gpu_alloc;
 	}
-
-	plugin_log_msg("Indirect BO: GPU VA: %lx, size: %lx\n", gpu_addr_ib, n_packets * 28);
+	plugin_log_msg("Indirect BO: GPU VA: %lx, size: %lx\n", gpu_addr_ib, packets_per_buffer * 28);
 
 	resources[0] = h_bo_src;
-	resources[1] = h_bo_dest;
+	resources[1] = h_bo_dst;
 	resources[2] = h_bo_ib;
 	err = amdgpu_bo_list_create(h_dev, 3, resources, NULL, &h_bo_list);
 	if (err) {
@@ -728,103 +655,124 @@ int sdma_copy_bo(struct kfd_criu_bo_bucket *bo_buckets, void *userptr, int i, am
 		goto err_bo_list;
 	}
 
-	memset(&cs_req, 0, sizeof(cs_req));
-	memset(&fence, 0, sizeof(fence));
-	memset(ib, 0, n_packets * 28);
-
-	plugin_log_msg("setting up sdma packets for command submission\n");
 	bytes_remain = size;
-	gpu_addr_src_orig = gpu_addr_src;
-	gpu_addr_dest_orig = gpu_addr_dest;
+	if (type == SDMA_OP_VRAM_WRITE)
+		copy_dst = gpu_addr_dst;
+	else
+		copy_src = gpu_addr_src;
+
 	while (bytes_remain > 0) {
-		copy_size = min(bytes_remain, max_copy_size);
+		memset(&cs_req, 0, sizeof(cs_req));
+		memset(&fence, 0, sizeof(fence));
+		memset(&ib_info, 0, sizeof(ib_info));
+		memset(ib, 0, packets_per_buffer * 28);
 
-		ib[j++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
-		ib[j++] = copy_size;
-		ib[j++] = 0;
-		ib[j++] = 0xffffffff & gpu_addr_src;
-		ib[j++] = (0xffffffff00000000 & gpu_addr_src) >> 32;
-		ib[j++] = 0xffffffff & gpu_addr_dest;
-		ib[j++] = (0xffffffff00000000 & gpu_addr_dest) >> 32;
+		if (type == SDMA_OP_VRAM_WRITE) {
+			err = read_fp(storage_fp, buffer, min(bytes_remain, buffer_bo_size));
+			if (err) {
+				pr_perror("failed to read from storage");
+				goto err_bo_list;
+			}
+		}
 
-		gpu_addr_src += copy_size;
-		gpu_addr_dest += copy_size;
-		bytes_remain -= copy_size;
-	}
+		buffer_space_remain = buffer_bo_size;
+		if (type == SDMA_OP_VRAM_WRITE)
+			copy_src = gpu_addr_src;
+		else
+			copy_dst = gpu_addr_dst;
+		j = 0;
 
-	gpu_addr_src = gpu_addr_src_orig;
-	gpu_addr_dest = gpu_addr_dest_orig;
-	plugin_log_msg("pad the IB to align on 8 dw boundary\n");
-	/* pad the IB to the required number of dw with SDMA_NOP */
-	while (j & 7)
-		ib[j++] = SDMA_NOP;
+		while (bytes_remain > 0 && buffer_space_remain > 0) {
+			copy_size = min(min(bytes_remain, max_copy_size), buffer_space_remain);
 
-	ib_info.ib_mc_address = gpu_addr_ib;
-	ib_info.size = j;
+			ib[j++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
+			ib[j++] = copy_size;
+			ib[j++] = 0;
+			ib[j++] = 0xffffffff & copy_src;
+			ib[j++] = (0xffffffff00000000 & copy_src) >> 32;
+			ib[j++] = 0xffffffff & copy_dst;
+			ib[j++] = (0xffffffff00000000 & copy_dst) >> 32;
 
-	cs_req.ip_type = AMDGPU_HW_IP_DMA;
-	/* possible future optimization: may use other rings, info available in
-	 * amdgpu_query_hw_ip_info()
-	 */
-	cs_req.ring = 0;
-	cs_req.number_of_ibs = 1;
-	cs_req.ibs = &ib_info;
-	cs_req.resources = h_bo_list;
-	cs_req.fence_info.handle = NULL;
+			copy_src += copy_size;
+			copy_dst += copy_size;
+			bytes_remain -= copy_size;
+			buffer_space_remain -= copy_size;
+		}
+		/* pad the IB to the required number of dw with SDMA_NOP */
+		while (j & 7)
+			ib[j++] = SDMA_NOP;
 
-	plugin_log_msg("create the context\n");
-	err = amdgpu_cs_ctx_create(h_dev, &h_ctx);
-	if (err) {
-		pr_perror("failed to create context for SDMA command submission");
-		goto err_ctx;
-	}
+		ib_info.ib_mc_address = gpu_addr_ib;
+		ib_info.size = j;
 
-	plugin_log_msg("initiate sdma command submission\n");
-	err = amdgpu_cs_submit(h_ctx, 0, &cs_req, 1);
-	if (err) {
-		pr_perror("failed to submit command for SDMA IB");
-		goto err_cs_submit_ib;
-	}
+		cs_req.ip_type = AMDGPU_HW_IP_DMA;
+		/* possible future optimization: may use other rings, info available in
+		 * amdgpu_query_hw_ip_info()
+		 */
+		cs_req.ring = 0;
+		cs_req.number_of_ibs = 1;
+		cs_req.ibs = &ib_info;
+		cs_req.resources = h_bo_list;
+		cs_req.fence_info.handle = NULL;
 
-	fence.context = h_ctx;
-	fence.ip_type = AMDGPU_HW_IP_DMA;
-	fence.ip_instance = 0;
-	fence.ring = 0;
-	fence.fence = cs_req.seq_no;
-	err = amdgpu_cs_query_fence_status(&fence, AMDGPU_TIMEOUT_INFINITE, 0, &expired);
-	if (err) {
-		pr_perror("failed to query fence status");
-		goto err_cs_submit_ib;
-	}
+		err = amdgpu_cs_ctx_create(h_dev, &h_ctx);
+		if (err) {
+			pr_perror("failed to create context for SDMA command submission");
+			goto err_ctx;
+		}
+		err = amdgpu_cs_submit(h_ctx, 0, &cs_req, 1);
+		if (err) {
+			pr_perror("failed to submit command for SDMA IB");
+			goto err_cs_submit_ib;
+		}
 
-	if (!expired) {
-		pr_err("IB execution did not complete\n");
-		err = -EBUSY;
-		goto err_cs_submit_ib;
-	}
+		fence.context = h_ctx;
+		fence.ip_type = AMDGPU_HW_IP_DMA;
+		fence.ip_instance = 0;
+		fence.ring = 0;
+		fence.fence = cs_req.seq_no;
+		err = amdgpu_cs_query_fence_status(&fence, AMDGPU_TIMEOUT_INFINITE, 0, &expired);
+		if (err) {
+			pr_perror("failed to query fence status");
+			goto err_cs_submit_ib;
+		}
+		if (!expired) {
+			pr_err("IB execution did not complete\n");
+			err = -EBUSY;
+			goto err_cs_submit_ib;
+		}
 
-	plugin_log_msg("done querying fence status\n");
+		if (type == SDMA_OP_VRAM_READ) {
+			err = write_fp(storage_fp, buffer, buffer_bo_size - buffer_space_remain);
+			if (err) {
+				pr_perror("failed to write out to storage");
+				goto err_cs_submit_ib;
+			}
+		}
 
 err_cs_submit_ib:
-	amdgpu_cs_ctx_free(h_ctx);
+		amdgpu_cs_ctx_free(h_ctx);
+		if (err)
+			break;
+	}
 err_ctx:
 	amdgpu_bo_list_destroy(h_bo_list);
 err_bo_list:
-	free_and_unmap(n_packets * 28, h_bo_ib, h_va_ib, gpu_addr_ib, ib);
+	free_and_unmap(packets_per_buffer * 28, h_bo_ib, h_va_ib, gpu_addr_ib, ib);
 err_ib_gpu_alloc:
-	err = amdgpu_bo_va_op(h_bo_dest, 0, size, gpu_addr_dest, 0, AMDGPU_VA_OP_UNMAP);
+	err = amdgpu_bo_va_op(h_bo_dst, 0, size, gpu_addr_dst, 0, AMDGPU_VA_OP_UNMAP);
 	if (err)
-		pr_perror("failed to GPU unmap the dest BO %lx, size = %lx", gpu_addr_dest, size);
-err_dest_bo_map:
-	err = amdgpu_va_range_free(h_va_dest);
+		pr_perror("failed to GPU unmap the dest BO %lx, size = %lx", gpu_addr_dst, size);
+err_dst_bo_map:
+	err = amdgpu_va_range_free(h_va_dst);
 	if (err)
 		pr_perror("dest range free failed");
-err_dest_va:
-	err = amdgpu_bo_free(h_bo_dest);
+err_dst_va:
+	if (!do_not_free)
+		err = amdgpu_bo_free(h_bo_dst);
 	if (err)
 		pr_perror("dest bo free failed");
-
-err_dest_bo_prep:
+err_dst_bo_prep:
 	err = amdgpu_bo_va_op(h_bo_src, 0, size, gpu_addr_src, 0, AMDGPU_VA_OP_UNMAP);
 	if (err)
 		pr_perror("failed to GPU unmap the src BO %lx, size = %lx", gpu_addr_src, size);
@@ -836,7 +784,6 @@ err_src_va:
 	err = amdgpu_bo_free(h_bo_src);
 	if (err)
 		pr_perror("src bo free failed");
-
 	plugin_log_msg("Leaving sdma_copy_bo, err = %d\n", err);
 	return err;
 }
@@ -845,19 +792,18 @@ void *dump_bo_contents(void *_thread_data)
 {
 	struct thread_data *thread_data = (struct thread_data *)_thread_data;
 	struct kfd_criu_bo_bucket *bo_buckets = thread_data->bo_buckets;
-	BoEntry **bo_info = thread_data->bo_entries;
 	struct amdgpu_gpu_info gpu_info = { 0 };
 	amdgpu_device_handle h_dev;
-	size_t max_bo_size = 0, image_size = 0;
+	size_t max_bo_size = 0, image_size = 0, buffer_size;
 	uint64_t max_copy_size;
 	uint32_t major, minor;
 	int num_bos = 0;
 	int i, ret = 0;
 	FILE *bo_contents_fp = NULL;
-	void *buffer;
+	void *buffer = NULL;
 	char img_path[40];
 
-	pr_info("amdgpu_plugin: Thread[0x%x] started\n", thread_data->gpu_id);
+	pr_info("Thread[0x%x] started\n", thread_data->gpu_id);
 
 	ret = amdgpu_device_initialize(thread_data->drm_fd, &major, &minor, &h_dev);
 	if (ret) {
@@ -884,15 +830,16 @@ void *dump_bo_contents(void *_thread_data)
 		}
 	}
 
-	/* Allocate buffer to fit biggest BO */
-	posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), max_bo_size);
+	buffer_size = kfd_max_buffer_size > 0 ? min(kfd_max_buffer_size, max_bo_size) : max_bo_size;
+
+	posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size);
 	if (!buffer) {
-		pr_perror("Failed to alloc aligned memory");
+		pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE.");
 		ret = -ENOMEM;
 		goto exit;
 	}
 
-	snprintf(img_path, sizeof(img_path), IMG_PAGES_FILE, thread_data->id, thread_data->gpu_id);
+	snprintf(img_path, sizeof(img_path), IMG_KFD_PAGES_FILE, thread_data->id, thread_data->gpu_id);
 	bo_contents_fp = open_img_file(img_path, true, &image_size);
 	if (!bo_contents_fp) {
 		pr_perror("Cannot fopen %s", img_path);
@@ -910,19 +857,17 @@ void *dump_bo_contents(void *_thread_data)
 		num_bos++;
 
 		/* perform sDMA based vram copy */
-		ret = sdma_copy_bo(bo_buckets, buffer, i, h_dev, max_copy_size, SDMA_OP_VRAM_READ);
+		ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
+				   SDMA_OP_VRAM_READ, false);
+
 		if (ret) {
 			pr_err("Failed to drain the BO using sDMA: bo_buckets[%d]\n", i);
 			break;
 		}
-		plugin_log_msg("** Successfully drained the BO using sDMA: bo_buckets[%d] **\n", i);
-		ret = write_fp(bo_contents_fp, buffer, bo_info[i]->size);
-		if (ret)
-			break;
 	}
 
 exit:
-	pr_info("amdgpu_plugin: Thread[0x%x] done num_bos:%d ret:%d\n", thread_data->gpu_id, num_bos, ret);
+	pr_info("Thread[0x%x] done num_bos:%d ret:%d\n", thread_data->gpu_id, num_bos, ret);
 
 	if (bo_contents_fp)
 		fclose(bo_contents_fp);
@@ -939,19 +884,18 @@ void *restore_bo_contents(void *_thread_data)
 {
 	struct thread_data *thread_data = (struct thread_data *)_thread_data;
 	struct kfd_criu_bo_bucket *bo_buckets = thread_data->bo_buckets;
-	size_t image_size = 0, total_bo_size = 0, max_bo_size = 0;
-	BoEntry **bo_info = thread_data->bo_entries;
+	size_t image_size = 0, total_bo_size = 0, max_bo_size = 0, buffer_size;
 	struct amdgpu_gpu_info gpu_info = { 0 };
 	amdgpu_device_handle h_dev;
 	uint64_t max_copy_size;
 	uint32_t major, minor;
 	FILE *bo_contents_fp = NULL;
-	void *buffer;
+	void *buffer = NULL;
 	char img_path[40];
 	int num_bos = 0;
 	int i, ret = 0;
 
-	pr_info("amdgpu_plugin: Thread[0x%x] started\n", thread_data->gpu_id);
+	pr_info("Thread[0x%x] started\n", thread_data->gpu_id);
 
 	ret = amdgpu_device_initialize(thread_data->drm_fd, &major, &minor, &h_dev);
 	if (ret) {
@@ -969,7 +913,7 @@ void *restore_bo_contents(void *_thread_data)
 	max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE :
 								   SDMA_LINEAR_COPY_MAX_SIZE - 1;
 
-	snprintf(img_path, sizeof(img_path), IMG_PAGES_FILE, thread_data->id, thread_data->gpu_id);
+	snprintf(img_path, sizeof(img_path), IMG_KFD_PAGES_FILE, thread_data->id, thread_data->gpu_id);
 	bo_contents_fp = open_img_file(img_path, false, &image_size);
 	if (!bo_contents_fp) {
 		pr_perror("Cannot fopen %s", img_path);
@@ -977,7 +921,6 @@ void *restore_bo_contents(void *_thread_data)
 		goto exit;
 	}
 
-	/* Allocate buffer to fit biggest BO */
 	for (i = 0; i < thread_data->num_of_bos; i++) {
 		if (bo_buckets[i].gpu_id == thread_data->gpu_id &&
 		    (bo_buckets[i].alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))) {
@@ -989,17 +932,17 @@ void *restore_bo_contents(void *_thread_data)
 	}
 
 	if (total_bo_size != image_size) {
-		pr_err("amdgpu_plugin: %s size mismatch (current:%ld:expected:%ld)\n", img_path, image_size,
-		       total_bo_size);
+		pr_err("%s size mismatch (current:%ld:expected:%ld)\n", img_path, image_size, total_bo_size);
 
 		ret = -EINVAL;
 		goto exit;
 	}
 
-	/* Allocate buffer to fit biggest BO */
-	posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), max_bo_size);
+	buffer_size = kfd_max_buffer_size > 0 ? min(kfd_max_buffer_size, max_bo_size) : max_bo_size;
+
+	posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size);
 	if (!buffer) {
-		pr_perror("Failed to alloc aligned memory");
+		pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE.");
 		ret = -ENOMEM;
 		goto exit;
 	}
@@ -1013,11 +956,8 @@ void *restore_bo_contents(void *_thread_data)
 
 		num_bos++;
 
-		ret = read_fp(bo_contents_fp, buffer, bo_info[i]->size);
-		if (ret)
-			goto exit;
-
-		ret = sdma_copy_bo(bo_buckets, buffer, i, h_dev, max_copy_size, SDMA_OP_VRAM_WRITE);
+		ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
+				   SDMA_OP_VRAM_WRITE, false);
 		if (ret) {
 			pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i);
 			break;
@@ -1026,7 +966,7 @@ void *restore_bo_contents(void *_thread_data)
 	}
 
 exit:
-	pr_info("amdgpu_plugin: Thread[0x%x] done num_bos:%d ret:%d\n", thread_data->gpu_id, num_bos, ret);
+	pr_info("Thread[0x%x] done num_bos:%d ret:%d\n", thread_data->gpu_id, num_bos, ret);
 
 	if (bo_contents_fp)
 		fclose(bo_contents_fp);
@@ -1054,9 +994,9 @@ int check_hsakmt_shared_mem(uint64_t *shared_mem_size, uint32_t *shared_mem_magi
 	/* First 4 bytes of shared file is the magic */
 	ret = read_file(HSAKMT_SHM_PATH, shared_mem_magic, sizeof(*shared_mem_magic));
 	if (ret)
-		pr_perror("amdgpu_plugin: Failed to read shared mem magic");
+		pr_perror("Failed to read shared mem magic");
 	else
-		plugin_log_msg("amdgpu_plugin: Shared mem magic:0x%x\n", *shared_mem_magic);
+		plugin_log_msg("Shared mem magic:0x%x\n", *shared_mem_magic);
 
 	return 0;
 }
@@ -1071,7 +1011,7 @@ int restore_hsakmt_shared_mem(const uint64_t shared_mem_size, const uint32_t sha
 		return 0;
 
 	if (!stat(HSAKMT_SHM_PATH, &st)) {
-		pr_debug("amdgpu_plugin: %s already exists\n", HSAKMT_SHM_PATH);
+		pr_debug("%s already exists\n", HSAKMT_SHM_PATH);
 	} else {
 		pr_info("Warning:%s was missing. Re-creating new file but we may lose perf counters\n",
 			HSAKMT_SHM_PATH);
@@ -1079,14 +1019,14 @@ int restore_hsakmt_shared_mem(const uint64_t shared_mem_size, const uint32_t sha
 
 		ret = ftruncate(fd, shared_mem_size);
 		if (ret < 0) {
-			pr_err("amdgpu_plugin: Failed to truncate shared mem %s\n", HSAKMT_SHM);
+			pr_err("Failed to truncate shared mem %s\n", HSAKMT_SHM);
 			close(fd);
 			return -errno;
 		}
 
 		ret = write(fd, &shared_mem_magic, sizeof(shared_mem_magic));
 		if (ret != sizeof(shared_mem_magic)) {
-			pr_perror("amdgpu_plugin: Failed to restore shared mem magic");
+			pr_perror("Failed to restore shared mem magic");
 			close(fd);
 			return -errno;
 		}
@@ -1103,24 +1043,163 @@ int restore_hsakmt_shared_mem(const uint64_t shared_mem_size, const uint32_t sha
 	return 0;
 }
 
-static int unpause_process(int fd)
+int amdgpu_unpause_processes(int pid)
 {
 	int ret = 0;
 	struct kfd_ioctl_criu_args args = { 0 };
+	struct list_head *l = get_dumped_fds();
+	struct dumped_fd *st;
 
-	args.op = KFD_CRIU_OP_UNPAUSE;
+	list_for_each_entry(st, l, l) {
+		if (st->is_drm) {
+			close(st->fd);
+		} else {
+			args.op = KFD_CRIU_OP_UNPAUSE;
 
-	ret = kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args);
-	if (ret) {
-		pr_perror("amdgpu_plugin: Failed to unpause process");
-		goto exit;
+			ret = kmtIoctl(st->fd, AMDKFD_IOC_CRIU_OP, &args);
+			if (ret) {
+				pr_perror("Failed to unpause process");
+				goto exit;
+			}
+		}
 	}
 
+	if (post_dump_dmabuf_check() < 0)
+		ret = -1;
+
 exit:
 	pr_info("Process unpaused %s (ret:%d)\n", ret ? "Failed" : "Ok", ret);
+	clear_dumped_fds();
 
 	return ret;
 }
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__DUMP_DEVICES_LATE, amdgpu_unpause_processes)
+
+int store_dmabuf_fd(int handle, int fd)
+{
+	int id;
+
+	id = fdstore_add(fd);
+	mutex_lock(shared_memory_mutex);
+	for (int i = 0; i < shared_memory->num_handles; i++) {
+		if (shared_memory->handles[i].handle == handle) {
+			mutex_unlock(shared_memory_mutex);
+			return 0;
+		}
+		if (shared_memory->handles[i].handle == -1) {
+			shared_memory->handles[i].handle = handle;
+			shared_memory->handles[i].fdstore_id = id;
+			mutex_unlock(shared_memory_mutex);
+			return 0;
+		}
+	}
+	mutex_unlock(shared_memory_mutex);
+
+	return -1;
+}
+
+int amdgpu_id_for_handle(int handle)
+{
+	mutex_lock(shared_memory_mutex);
+	for (int i = 0; i < shared_memory->num_handles; i++) {
+		if (shared_memory->handles[i].handle == handle) {
+			mutex_unlock(shared_memory_mutex);
+			return shared_memory->handles[i].fdstore_id;
+		}
+	}
+	mutex_unlock(shared_memory_mutex);
+	return -1;
+}
+
+int amdgpu_restore_init(void)
+{
+	if (!shared_memory) {
+		int protection = PROT_READ | PROT_WRITE;
+		int visibility = MAP_SHARED | MAP_ANONYMOUS;
+		size_t img_size;
+		FILE *img_fp = NULL;
+		int ret;
+		unsigned char *buf;
+		int num_handles = 0;
+		char img_path[PATH_MAX];
+		CriuRenderNode *rd = NULL;
+		CriuKfd *e = NULL;
+
+		DIR *d;
+		struct dirent *dir;
+		d = opendir(".");
+		if (d) {
+			while ((dir = readdir(d)) != NULL) {
+				if (strncmp("amdgpu-kfd-", dir->d_name, strlen("amdgpu-kfd-")) == 0) {
+					img_fp = open_img_file(dir->d_name, false, &img_size);
+					buf = xmalloc(img_size);
+					if (!buf) {
+						fclose(img_fp);
+						return -ENOMEM;
+					}
+
+					ret = read_fp(img_fp, buf, img_size);
+					if (ret) {
+						pr_perror("Unable to read from %s", img_path);
+						fclose(img_fp);
+						xfree(buf);
+						return ret;
+					}
+
+					fclose(img_fp);
+					e = criu_kfd__unpack(NULL, img_size, buf);
+					num_handles += e->num_of_bos;
+					criu_kfd__free_unpacked(e, NULL);
+					xfree(buf);
+				}
+				if (strncmp("amdgpu-renderD-", dir->d_name, strlen("amdgpu-renderD-")) == 0) {
+					img_fp = open_img_file(dir->d_name, false, &img_size);
+					buf = xmalloc(img_size);
+					if (!buf) {
+						fclose(img_fp);
+						return -ENOMEM;
+					}
+
+					ret = read_fp(img_fp, buf, img_size);
+					if (ret) {
+						pr_perror("Unable to read from %s", img_path);
+						fclose(img_fp);
+						xfree(buf);
+						return ret;
+					}
+
+					fclose(img_fp);
+					rd = criu_render_node__unpack(NULL, img_size, buf);
+					num_handles += rd->num_of_bos;
+					criu_render_node__free_unpacked(rd, NULL);
+					xfree(buf);
+				}
+			}
+			closedir(d);
+		}
+
+		if (num_handles > 0) {
+			shared_memory = mmap(NULL, sizeof(shared_memory), protection, visibility, -1, 0);
+			shared_memory->num_handles = num_handles;
+			shared_memory->handles = mmap(NULL, sizeof(struct handle_id) * num_handles, protection, visibility, -1, 0);
+
+			for (int i = 0; i < num_handles; i++) {
+				shared_memory->handles[i].handle = -1;
+				shared_memory->handles[i].fdstore_id = -1;
+			}
+
+			shared_memory_mutex = shmalloc(sizeof(*shared_memory_mutex));
+			if (!shared_memory_mutex) {
+				pr_err("Can't create amdgpu mutex\n");
+				return -1;
+			}
+			mutex_init(shared_memory_mutex);
+		}
+	}
+
+	return 0;
+}
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESTORE_INIT, amdgpu_restore_init)
 
 static int save_devices(int fd, struct kfd_ioctl_criu_args *args, struct kfd_criu_device_bucket *device_buckets,
 			CriuKfd *e)
@@ -1164,6 +1243,8 @@ static int save_bos(int id, int fd, struct kfd_ioctl_criu_args *args, struct kfd
 {
 	struct thread_data *thread_datas;
 	int ret = 0, i;
+	amdgpu_device_handle h_dev;
+	uint32_t major, minor;
 
 	pr_debug("Dumping %d BOs\n", args->num_bos);
 
@@ -1180,13 +1261,26 @@ static int save_bos(int id, int fd, struct kfd_ioctl_criu_args *args, struct kfd
 
 	for (i = 0; i < e->num_of_bos; i++) {
 		struct kfd_criu_bo_bucket *bo_bucket = &bo_buckets[i];
-		BoEntry *boinfo = e->bo_entries[i];
+		KfdBoEntry *boinfo = e->bo_entries[i];
 
 		boinfo->gpu_id = bo_bucket->gpu_id;
 		boinfo->addr = bo_bucket->addr;
 		boinfo->size = bo_bucket->size;
 		boinfo->offset = bo_bucket->offset;
 		boinfo->alloc_flags = bo_bucket->alloc_flags;
+
+		ret = amdgpu_device_initialize(node_get_drm_render_device(sys_get_node_by_gpu_id(&src_topology, bo_bucket->gpu_id)), &major, &minor, &h_dev);
+
+		boinfo->handle = get_gem_handle(h_dev, bo_bucket->dmabuf_fd);
+
+		amdgpu_device_deinitialize(h_dev);
+	}
+	for (i = 0; i < e->num_of_bos; i++) {
+		KfdBoEntry *boinfo = e->bo_entries[i];
+
+		ret = record_shared_bo(boinfo->handle, false);
+		if (ret)
+			goto exit;
 	}
 
 	for (int i = 0; i < e->num_of_gpus; i++) {
@@ -1254,7 +1348,7 @@ bool kernel_supports_criu(int fd)
 	}
 
 	if (kmtIoctl(fd, AMDKFD_IOC_GET_VERSION, &args) == -1) {
-		pr_perror("amdgpu_plugin: Failed to call get version ioctl");
+		pr_perror("Failed to call get version ioctl");
 		ret = false;
 		goto exit;
 	}
@@ -1262,8 +1356,8 @@ bool kernel_supports_criu(int fd)
 	pr_debug("Kernel IOCTL version:%d.%02d\n", args.major_version, args.minor_version);
 
 	if (args.major_version != KFD_IOCTL_MAJOR_VERSION || args.minor_version < MIN_KFD_IOCTL_MINOR_VERSION) {
-		pr_err("amdgpu_plugin: CR not supported on current kernel (current:%02d.%02d min:%02d.%02d)\n",
-		       args.major_version, args.minor_version, KFD_IOCTL_MAJOR_VERSION, MIN_KFD_IOCTL_MINOR_VERSION);
+		pr_err("CR not supported on current kernel (current:%02d.%02d min:%02d.%02d)\n", args.major_version,
+		       args.minor_version, KFD_IOCTL_MAJOR_VERSION, MIN_KFD_IOCTL_MINOR_VERSION);
 		ret = false;
 		goto exit;
 	}
@@ -1286,13 +1380,13 @@ int amdgpu_plugin_dump_file(int fd, int id)
 	size_t len;
 
 	if (fstat(fd, &st) == -1) {
-		pr_perror("amdgpu_plugin: fstat error");
+		pr_perror("fstat error");
 		return -1;
 	}
 
 	ret = stat(AMDGPU_KFD_DEVICE, &st_kfd);
 	if (ret == -1) {
-		pr_perror("amdgpu_plugin: fstat error for /dev/kfd");
+		pr_perror("fstat error for /dev/kfd");
 		return -1;
 	}
 
@@ -1307,50 +1401,36 @@ int amdgpu_plugin_dump_file(int fd, int id)
 		return -1;
 	}
 
-	/* Check whether this plugin was called for kfd or render nodes */
+	/* Check whether this plugin was called for kfd, dmabuf or render nodes */
+	ret = get_dmabuf_info(fd, &st);
+	if (ret < 0) {
+		pr_perror("Failed to get dmabuf info");
+		return -1;
+	}
+	if (ret == 0) {
+		pr_info("Dumping dmabuf fd = %d\n", fd);
+		return amdgpu_plugin_dmabuf_dump(fd, id);
+	}
+
 	if (major(st.st_rdev) != major(st_kfd.st_rdev) || minor(st.st_rdev) != 0) {
+
 		/* This is RenderD dumper plugin, for now just save renderD
 		 * minor number to be used during restore. In later phases this
 		 * needs to save more data for video decode etc.
 		 */
-
-		CriuRenderNode rd = CRIU_RENDER_NODE__INIT;
-		struct tp_node *tp_node;
-
-		pr_info("amdgpu_plugin: Dumper called for /dev/dri/renderD%d, FD = %d, ID = %d\n", minor(st.st_rdev),
-			fd, id);
-
-		tp_node = sys_get_node_by_render_minor(&src_topology, minor(st.st_rdev));
-		if (!tp_node) {
-			pr_err("amdgpu_plugin: Failed to find a device with minor number = %d\n", minor(st.st_rdev));
-
-			return -ENODEV;
-		}
-
-		rd.gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id);
-		if (!rd.gpu_id)
-			return -ENODEV;
-
-		len = criu_render_node__get_packed_size(&rd);
-		buf = xmalloc(len);
-		if (!buf)
-			return -ENOMEM;
-
-		criu_render_node__pack(&rd, buf);
-
-		snprintf(img_path, sizeof(img_path), IMG_RENDERD_FILE, id);
-		ret = write_img_file(img_path, buf, len);
-		if (ret) {
-			xfree(buf);
+		ret = amdgpu_plugin_drm_dump_file(fd, id, &st);
+		if (ret)
+			return ret;
+
+		ret = record_dumped_fd(fd, true);
+		if (ret)
 			return ret;
-		}
 
-		xfree(buf);
 		/* Need to return success here so that criu can call plugins for renderD nodes */
-		return ret;
+		return try_dump_dmabuf_list();
 	}
 
-	pr_info("amdgpu_plugin: %s : %s() called for fd = %d\n", CR_PLUGIN_DESC.name, __func__, major(st.st_rdev));
+	pr_info("%s() called for fd = %d\n", __func__, major(st.st_rdev));
 
 	/* KFD only allows ioctl calls from the same process that opened the KFD file descriptor.
 	 * The existing /dev/kfd file descriptor that is passed in is only allowed to do IOCTL calls with
@@ -1362,13 +1442,13 @@ int amdgpu_plugin_dump_file(int fd, int id)
 
 	args.op = KFD_CRIU_OP_PROCESS_INFO;
 	if (kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args) == -1) {
-		pr_perror("amdgpu_plugin: Failed to call process info ioctl");
+		pr_perror("Failed to call process info ioctl");
 		ret = -1;
 		goto exit;
 	}
 
-	pr_info("amdgpu_plugin: devices:%d bos:%d objects:%d priv_data:%lld\n", args.num_devices, args.num_bos,
-		args.num_objects, args.priv_data_size);
+	pr_info("devices:%" PRIu32 " bos:%" PRIu32 " objects:%" PRIu32 " priv_data:%" PRIu64 "\n",
+		args.num_devices, args.num_bos, args.num_objects, args.priv_data_size);
 
 	e = xmalloc(sizeof(*e));
 	if (!e) {
@@ -1401,7 +1481,7 @@ int amdgpu_plugin_dump_file(int fd, int id)
 	args.op = KFD_CRIU_OP_CHECKPOINT;
 	ret = kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args);
 	if (ret) {
-		pr_perror("amdgpu_plugin: Failed to call dumper (process) ioctl");
+		pr_perror("Failed to call dumper (process) ioctl");
 		goto exit;
 	}
 
@@ -1423,11 +1503,11 @@ int amdgpu_plugin_dump_file(int fd, int id)
 		goto exit;
 
 	snprintf(img_path, sizeof(img_path), IMG_KFD_FILE, id);
-	pr_info("amdgpu_plugin: img_path = %s\n", img_path);
+	pr_info("img_path = %s\n", img_path);
 
 	len = criu_kfd__get_packed_size(e);
 
-	pr_info("amdgpu_plugin: Len = %ld\n", len);
+	pr_info("Len = %ld\n", len);
 
 	buf = xmalloc(len);
 	if (!buf) {
@@ -1441,11 +1521,12 @@ int amdgpu_plugin_dump_file(int fd, int id)
 	ret = write_img_file(img_path, buf, len);
 
 	xfree(buf);
-exit:
-	/* Restore all queues */
-	unpause_process(fd);
 
-	sys_close_drm_render_devices(&src_topology);
+	ret = record_dumped_fd(fd, false);
+	if (ret)
+		goto exit;
+
+exit:
 	xfree((void *)args.devices);
 	xfree((void *)args.bos);
 	xfree((void *)args.priv_data);
@@ -1453,9 +1534,9 @@ exit:
 	free_e(e);
 
 	if (ret)
-		pr_err("amdgpu_plugin: Failed to dump (ret:%d)\n", ret);
+		pr_err("Failed to dump (ret:%d)\n", ret);
 	else
-		pr_info("amdgpu_plugin: Dump successful\n");
+		pr_info("Dump successful\n");
 
 	return ret;
 }
@@ -1468,7 +1549,6 @@ static int restore_devices(struct kfd_ioctl_criu_args *args, CriuKfd *e)
 	int ret = 0, bucket_index = 0;
 
 	pr_debug("Restoring %d devices\n", e->num_of_gpus);
-
 	args->num_devices = e->num_of_gpus;
 	device_buckets = xzalloc(sizeof(*device_buckets) * args->num_devices);
 	if (!device_buckets)
@@ -1478,7 +1558,7 @@ static int restore_devices(struct kfd_ioctl_criu_args *args, CriuKfd *e)
 
 	for (int entries_i = 0; entries_i < e->num_of_cpus + e->num_of_gpus; entries_i++) {
 		struct kfd_criu_device_bucket *device_bucket;
-		DeviceEntry *devinfo = e->device_entries[entries_i];
+		KfdDeviceEntry *devinfo = e->device_entries[entries_i];
 		struct tp_node *tp_node;
 
 		if (!devinfo->gpu_id)
@@ -1501,10 +1581,10 @@ static int restore_devices(struct kfd_ioctl_criu_args *args, CriuKfd *e)
 
 		device_bucket->drm_fd = node_get_drm_render_device(tp_node);
 		if (device_bucket->drm_fd < 0) {
-			pr_perror("amdgpu_plugin: Can't pass NULL drm render fd to driver");
+			pr_perror("Can't pass NULL drm render fd to driver");
 			goto exit;
 		} else {
-			pr_info("amdgpu_plugin: passing drm render fd = %d to driver\n", device_bucket->drm_fd);
+			pr_info("passing drm render fd = %d to driver\n", device_bucket->drm_fd);
 		}
 	}
 
@@ -1528,7 +1608,7 @@ static int restore_bos(struct kfd_ioctl_criu_args *args, CriuKfd *e)
 
 	for (int i = 0; i < args->num_bos; i++) {
 		struct kfd_criu_bo_bucket *bo_bucket = &bo_buckets[i];
-		BoEntry *bo_entry = e->bo_entries[i];
+		KfdBoEntry *bo_entry = e->bo_entries[i];
 
 		bo_bucket->gpu_id = bo_entry->gpu_id;
 		bo_bucket->addr = bo_entry->addr;
@@ -1541,19 +1621,37 @@ static int restore_bos(struct kfd_ioctl_criu_args *args, CriuKfd *e)
 	}
 
 	pr_info("Restore BOs Ok\n");
+
+	return 0;
+}
+
+int save_vma_updates(uint64_t offset, uint64_t addr, uint64_t restored_offset, int fd)
+{
+	struct vma_metadata *vma_md;
+
+	vma_md = xmalloc(sizeof(*vma_md));
+	if (!vma_md) {
+		return -ENOMEM;
+	}
+
+	memset(vma_md, 0, sizeof(*vma_md));
+
+	vma_md->old_pgoff = offset;
+	vma_md->vma_entry = addr;
+
+	vma_md->new_pgoff = restored_offset;
+	vma_md->fd = fd;
+
+	list_add_tail(&vma_md->list, &update_vma_info_list);
+
 	return 0;
 }
 
 static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKfd *e)
 {
-	struct thread_data *thread_datas;
+	struct thread_data *thread_datas = NULL;
 	int thread_i, ret = 0;
-
-	thread_datas = xzalloc(sizeof(*thread_datas) * e->num_of_gpus);
-	if (!thread_datas) {
-		ret = -ENOMEM;
-		goto exit;
-	}
+	uint64_t offset = 0;
 
 	for (int i = 0; i < e->num_of_bos; i++) {
 		struct kfd_criu_bo_bucket *bo_bucket = &bo_buckets[i];
@@ -1588,7 +1686,7 @@ static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKf
 			vma_md->new_pgoff = bo_bucket->restored_offset;
 			vma_md->fd = node_get_drm_render_device(tp_node);
 
-			plugin_log_msg("amdgpu_plugin: adding vma_entry:addr:0x%lx old-off:0x%lx "
+			plugin_log_msg("adding vma_entry:addr:0x%lx old-off:0x%lx "
 				       "new_off:0x%lx new_minor:%d\n",
 				       vma_md->vma_entry, vma_md->old_pgoff, vma_md->new_pgoff, vma_md->new_minor);
 
@@ -1596,56 +1694,101 @@ static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKf
 		}
 	}
 
-	thread_i = 0;
-	for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) {
-		struct tp_node *dev;
-		int ret_thread = 0;
-		uint32_t target_gpu_id;
+	if (!parallel_disabled) {
+		parallel_restore_cmd restore_cmd;
+		pr_info("Begin to send parallel restore cmd\n");
+		ret = init_parallel_restore_cmd(e->num_of_bos, id, e->num_of_gpus, &restore_cmd);
+		if (ret)
+			goto exit_parallel;
 
-		if (!e->device_entries[i]->gpu_id)
-			continue;
+		for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) {
+			uint32_t target_gpu_id;
+			struct tp_node *dev;
 
-		/* e->device_entries[i]->gpu_id is user_gpu_id, target_gpu_id is actual_gpu_id */
-		target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id);
+			if (!e->device_entries[i]->gpu_id)
+				continue;
 
-		/* We need the fd for actual_gpu_id */
-		dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id);
-		if (!dev) {
-			pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id);
-			ret = -ENODEV;
+			target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id);
+			dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id);
+			if (!dev) {
+				pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id);
+				ret = -ENODEV;
+				goto exit_parallel;
+			}
+			parallel_restore_gpu_id_add(e->device_entries[i]->gpu_id, dev->drm_render_minor, &restore_cmd);
+
+			for (int j = 0; j < e->num_of_bos; j++) {
+				if (bo_buckets[j].gpu_id != e->device_entries[i]->gpu_id)
+					continue;
+				if (bo_buckets[j].alloc_flags &
+				    (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
+					parallel_restore_bo_add(bo_buckets[j].dmabuf_fd, bo_buckets[j].gpu_id,
+								bo_buckets[j].size, offset, &restore_cmd);
+					offset += bo_buckets[j].size;
+				}
+			}
+		}
+		ret = send_parallel_restore_cmd(&restore_cmd);
+exit_parallel:
+		free_parallel_restore_cmd(&restore_cmd);
+	} else {
+		thread_datas = xzalloc(sizeof(*thread_datas) * e->num_of_gpus);
+		if (!thread_datas) {
+			ret = -ENOMEM;
 			goto exit;
 		}
 
-		thread_datas[thread_i].id = id;
-		thread_datas[thread_i].gpu_id = e->device_entries[i]->gpu_id;
-		thread_datas[thread_i].bo_buckets = bo_buckets;
-		thread_datas[thread_i].bo_entries = e->bo_entries;
-		thread_datas[thread_i].pid = e->pid;
-		thread_datas[thread_i].num_of_bos = e->num_of_bos;
+		thread_i = 0;
+		for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) {
+			struct tp_node *dev;
+			int ret_thread = 0;
+			uint32_t target_gpu_id;
 
-		thread_datas[thread_i].drm_fd = node_get_drm_render_device(dev);
-		if (thread_datas[thread_i].drm_fd < 0) {
-			ret = -thread_datas[thread_i].drm_fd;
-			goto exit;
+			if (!e->device_entries[i]->gpu_id)
+				continue;
+
+			/* e->device_entries[i]->gpu_id is user_gpu_id, target_gpu_id is actual_gpu_id */
+			target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id);
+
+			/* We need the fd for actual_gpu_id */
+			dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id);
+			if (!dev) {
+				pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id);
+				ret = -ENODEV;
+				goto exit;
+			}
+
+			thread_datas[thread_i].id = id;
+			thread_datas[thread_i].gpu_id = e->device_entries[i]->gpu_id;
+			thread_datas[thread_i].bo_buckets = bo_buckets;
+			thread_datas[thread_i].bo_entries = e->bo_entries;
+			thread_datas[thread_i].pid = e->pid;
+			thread_datas[thread_i].num_of_bos = e->num_of_bos;
+
+			thread_datas[thread_i].drm_fd = node_get_drm_render_device(dev);
+			if (thread_datas[thread_i].drm_fd < 0) {
+				ret = -thread_datas[thread_i].drm_fd;
+				goto exit;
+			}
+
+			ret_thread = pthread_create(&thread_datas[thread_i].thread, NULL, restore_bo_contents,
+						    (void *)&thread_datas[thread_i]);
+			if (ret_thread) {
+				pr_err("Failed to create thread[%i] ret:%d\n", thread_i, ret_thread);
+				ret = -ret_thread;
+				goto exit;
+			}
+			thread_i++;
 		}
 
-		ret_thread = pthread_create(&thread_datas[thread_i].thread, NULL, restore_bo_contents,
-					    (void *)&thread_datas[thread_i]);
-		if (ret_thread) {
-			pr_err("Failed to create thread[%i] ret:%d\n", thread_i, ret_thread);
-			ret = -ret_thread;
-			goto exit;
-		}
-		thread_i++;
-	}
+		for (int i = 0; i < e->num_of_gpus; i++) {
+			pthread_join(thread_datas[i].thread, NULL);
+			pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret);
 
-	for (int i = 0; i < e->num_of_gpus; i++) {
-		pthread_join(thread_datas[i].thread, NULL);
-		pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret);
-
-		if (thread_datas[i].ret) {
-			ret = thread_datas[i].ret;
-			goto exit;
+			if (thread_datas[i].ret) {
+				ret = thread_datas[i].ret;
+				goto exit;
+			}
 		}
 	}
 exit:
@@ -1653,12 +1796,12 @@ exit:
 		if (bo_buckets[i].dmabuf_fd != KFD_INVALID_FD)
 			close(bo_buckets[i].dmabuf_fd);
 	}
-
-	xfree(thread_datas);
+	if (thread_datas)
+		xfree(thread_datas);
 	return ret;
 }
 
-int amdgpu_plugin_restore_file(int id)
+int amdgpu_plugin_restore_file(int id, bool *retry_needed)
 {
 	int ret = 0, fd;
 	char img_path[PATH_MAX];
@@ -1669,7 +1812,12 @@ int amdgpu_plugin_restore_file(int id)
 	size_t img_size;
 	FILE *img_fp = NULL;
 
-	pr_info("amdgpu_plugin: Initialized kfd plugin restorer with ID = %d\n", id);
+	*retry_needed = false;
+
+	if (plugin_disabled)
+		return -ENOTSUP;
+
+	pr_info("Initialized kfd plugin restorer with ID = %d\n", id);
 
 	snprintf(img_path, sizeof(img_path), IMG_KFD_FILE, id);
 
@@ -1683,13 +1831,22 @@ int amdgpu_plugin_restore_file(int id)
 		 * TODO: Currently, this code will only work if this function is called for /dev/kfd
 		 * first as we assume restore_maps is already filled. Need to fix this later.
 		 */
-		snprintf(img_path, sizeof(img_path), IMG_RENDERD_FILE, id);
-		pr_info("Restoring RenderD %s\n", img_path);
+		snprintf(img_path, sizeof(img_path), IMG_DRM_FILE, id);
 
 		img_fp = open_img_file(img_path, false, &img_size);
-		if (!img_fp)
-			return -EINVAL;
-
+		if (!img_fp) {
+			ret = amdgpu_plugin_dmabuf_restore(id);
+			if (ret == 1) {
+				/* This is a dmabuf fd, but the corresponding buffer object that was
+				 * exported to make it has not yet been restored. Need to try again
+				 * later when the buffer object exists, so it can be re-exported.
+				 */
+				*retry_needed = true;
+				return 0;
+			}
+			return ret;
+		}
+		pr_info("Restoring RenderD %s\n", img_path);
 		pr_debug("RenderD Image file size:%ld\n", img_size);
 		buf = xmalloc(img_size);
 		if (!buf) {
@@ -1713,7 +1870,7 @@ int amdgpu_plugin_restore_file(int id)
 		}
 		fclose(img_fp);
 
-		pr_info("amdgpu_plugin: render node gpu_id = 0x%04x\n", rd->gpu_id);
+		pr_info("render node gpu_id = 0x%04x\n", rd->gpu_id);
 
 		target_gpu_id = maps_get_dest_gpu(&restore_maps, rd->gpu_id);
 		if (!target_gpu_id) {
@@ -1727,11 +1884,21 @@ int amdgpu_plugin_restore_file(int id)
 			goto fail;
 		}
 
-		pr_info("amdgpu_plugin: render node destination gpu_id = 0x%04x\n", tp_node->gpu_id);
+		pr_info("render node destination gpu_id = 0x%04x\n", tp_node->gpu_id);
 
 		fd = node_get_drm_render_device(tp_node);
-		if (fd < 0)
-			pr_err("amdgpu_plugin: Failed to open render device (minor:%d)\n", tp_node->drm_render_minor);
+		if (fd < 0) {
+			pr_err("Failed to open render device (minor:%d)\n", tp_node->drm_render_minor);
+			return -1;
+		}
+
+		ret = amdgpu_plugin_drm_restore_file(fd, rd);
+		if (ret == 1)
+			*retry_needed = true;
+		if (ret < 0) {
+			fd = ret;
+			goto fail;
+		}
 	fail:
 		criu_render_node__free_unpacked(rd, NULL);
 		xfree(buf);
@@ -1743,7 +1910,20 @@ int amdgpu_plugin_restore_file(int id)
 		 * copy of the fd. CRIU core owns the duplicated returned fd, and amdgpu_plugin owns the fd stored in
 		 * tp_node.
 		 */
-		return dup(fd);
+
+		if (fd < 0)
+			return fd;
+
+		if (!(*retry_needed)) {
+			fd = dup(fd);
+			if (fd == -1) {
+				pr_perror("unable to duplicate the render fd");
+				return -1;
+			}
+			return fd;
+		}
+
+		return 0;
 	}
 
 	fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC);
@@ -1752,7 +1932,7 @@ int amdgpu_plugin_restore_file(int id)
 		return -1;
 	}
 
-	pr_info("amdgpu_plugin: Opened kfd, fd = %d\n", fd);
+	pr_info("Opened kfd, fd = %d\n", fd);
 
 	if (!kernel_supports_criu(fd))
 		return -ENOTSUP;
@@ -1780,18 +1960,20 @@ int amdgpu_plugin_restore_file(int id)
 		return -1;
 	}
 
-	plugin_log_msg("amdgpu_plugin: read image file data\n");
+	plugin_log_msg("read image file data\n");
 
 	/*
 	 * Initialize fd_next to be 1 greater than the biggest file descriptor in use by the target restore process.
 	 * This way, we know that the file descriptors we store will not conflict with file descriptors inside core
 	 * CRIU.
 	 */
-	fd_next = find_unused_fd_pid(e->pid);
-	if (fd_next <= 0) {
-		pr_err("Failed to find unused fd (fd:%d)\n", fd_next);
-		ret = -EINVAL;
-		goto exit;
+	if (fd_next == -1) {
+		fd_next = find_unused_fd_pid(e->pid);
+		if (fd_next <= 0) {
+			pr_err("Failed to find unused fd (fd:%d)\n", fd_next);
+			ret = -EINVAL;
+			goto exit;
+		}
 	}
 
 	ret = devinfo_to_topology(e->device_entries, e->num_of_gpus + e->num_of_cpus, &src_topology);
@@ -1824,14 +2006,26 @@ int amdgpu_plugin_restore_file(int id)
 	args.num_objects = e->num_of_objects;
 	args.priv_data_size = e->priv_data.len;
 	args.priv_data = (uintptr_t)e->priv_data.data;
-
 	args.op = KFD_CRIU_OP_RESTORE;
+
 	if (kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args) == -1) {
 		pr_perror("Restore ioctl failed");
 		ret = -1;
 		goto exit;
 	}
 
+	if (ret < 0)
+		goto exit;
+
+	for (int i = 0; i < args.num_bos; i++) {
+		struct kfd_criu_bo_bucket *bo_bucket = &((struct kfd_criu_bo_bucket *)args.bos)[i];
+		KfdBoEntry *bo_entry = e->bo_entries[i];
+
+		if (bo_entry->handle != -1) {
+			store_dmabuf_fd(bo_entry->handle, bo_bucket->dmabuf_fd);
+		}
+	}
+
 	ret = restore_bo_data(id, (struct kfd_criu_bo_bucket *)args.bos, e);
 	if (ret)
 		goto exit;
@@ -1847,10 +2041,10 @@ exit:
 	xfree(buf);
 
 	if (ret) {
-		pr_err("amdgpu_plugin: Failed to restore (ret:%d)\n", ret);
+		pr_err("Failed to restore (ret:%d)\n", ret);
 		fd = ret;
 	} else {
-		pr_info("amdgpu_plugin: Restore successful (fd:%d)\n", fd);
+		pr_info("Restore successful (fd:%d)\n", fd);
 	}
 
 	return fd;
@@ -1870,7 +2064,10 @@ int amdgpu_plugin_update_vmamap(const char *in_path, const uint64_t addr, const
 	char *p_end;
 	bool is_kfd = false, is_renderD = false;
 
-	plugin_log_msg("amdgpu_plugin: Enter %s\n", __func__);
+	if (plugin_disabled)
+		return -ENOTSUP;
+
+	plugin_log_msg("Enter %s\n", __func__);
 
 	strncpy(path, in_path, sizeof(path));
 
@@ -1903,13 +2100,18 @@ int amdgpu_plugin_update_vmamap(const char *in_path, const uint64_t addr, const
 		if (addr == vma_md->vma_entry && old_offset == vma_md->old_pgoff) {
 			*new_offset = vma_md->new_pgoff;
 
-			if (is_renderD)
-				*updated_fd = vma_md->fd;
-			else
-				*updated_fd = -1;
+			*updated_fd = -1;
+			if (is_renderD) {
+				int fd = dup(vma_md->fd);
+				if (fd == -1) {
+					pr_perror("unable to duplicate the render fd");
+					return -1;
+				}
+				*updated_fd = fd;
+			}
 
-			plugin_log_msg("amdgpu_plugin: old_pgoff=0x%lx new_pgoff=0x%lx fd=%d\n", vma_md->old_pgoff,
-				       vma_md->new_pgoff, *updated_fd);
+			plugin_log_msg("old_pgoff=0x%lx new_pgoff=0x%lx fd=%d\n", vma_md->old_pgoff, vma_md->new_pgoff,
+				       *updated_fd);
 
 			return 1;
 		}
@@ -1922,26 +2124,290 @@ CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, amdgpu_plugin_update_vma
 int amdgpu_plugin_resume_devices_late(int target_pid)
 {
 	struct kfd_ioctl_criu_args args = { 0 };
-	int fd, ret = 0;
+	int fd, exit_code = 0;
 
-	pr_info("amdgpu_plugin: Inside %s for target pid = %d\n", __func__, target_pid);
+	if (plugin_disabled)
+		return -ENOTSUP;
+
+	if (!parallel_disabled) {
+		pr_info("Close parallel restore server\n");
+		if (close_parallel_restore_server()) {
+			pr_err("Close parallel restore server fail\n");
+			return -1;
+		}
+
+		exit_code = pthread_join(parallel_thread, NULL);
+		if (exit_code) {
+			pr_err("Failed to join parallel thread ret:%d\n", exit_code);
+			return -1;
+		}
+		if (parallel_thread_result) {
+			pr_err("Parallel restore fail\n");
+			return parallel_thread_result;
+		}
+	}
+
+	pr_info("Inside %s for target pid = %d\n", __func__, target_pid);
 
 	fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC);
 	if (fd < 0) {
 		pr_perror("failed to open kfd in plugin");
-		return -1;
+		return -ENOTSUP;
 	}
 
 	args.pid = target_pid;
 	args.op = KFD_CRIU_OP_RESUME;
-	pr_info("amdgpu_plugin: Calling IOCTL to start notifiers and queues\n");
+	pr_info("Calling IOCTL to start notifiers and queues\n");
 	if (kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args) == -1) {
-		pr_perror("restore late ioctl failed");
-		ret = -1;
+		if (errno == ESRCH) {
+			pr_info("Pid %d has no kfd process info\n", target_pid);
+			exit_code = -ENOTSUP;
+		} else {
+			pr_perror("restore late ioctl failed");
+			exit_code = -1;
+		}
 	}
 
+	clear_restore_state();
+
 	close(fd);
-	return ret;
+	return exit_code;
 }
 
 CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, amdgpu_plugin_resume_devices_late)
+
+int init_dev(int dev_minor, amdgpu_device_handle *h_dev, uint64_t *max_copy_size)
+{
+	int ret = 0;
+	int drm_fd = -1;
+	uint32_t major, minor;
+
+	struct amdgpu_gpu_info gpu_info = { 0 };
+
+	drm_fd = open_drm_render_device(dev_minor);
+	if (drm_fd < 0) {
+		return drm_fd;
+	}
+
+	ret = amdgpu_device_initialize(drm_fd, &major, &minor, h_dev);
+	if (ret) {
+		pr_perror("Failed to initialize device");
+		goto err;
+	}
+
+	ret = amdgpu_query_gpu_info(*h_dev, &gpu_info);
+	if (ret) {
+		pr_perror("failed to query gpuinfo via libdrm");
+		goto err;
+	}
+	*max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE :
+								    SDMA_LINEAR_COPY_MAX_SIZE - 1;
+	return 0;
+err:
+	amdgpu_device_deinitialize(*h_dev);
+	return ret;
+}
+
+FILE *get_bo_contents_fp(int id, int gpu_id, size_t tot_size)
+{
+	char img_path[PATH_MAX];
+	size_t image_size = 0;
+	FILE *bo_contents_fp = NULL;
+
+	snprintf(img_path, sizeof(img_path), IMG_KFD_PAGES_FILE, id, gpu_id);
+	bo_contents_fp = open_img_file(img_path, false, &image_size);
+	if (!bo_contents_fp) {
+		pr_perror("Cannot fopen %s", img_path);
+		return NULL;
+	}
+
+	if (tot_size != image_size) {
+		pr_err("%s size mismatch (current:%ld:expected:%ld)\n", img_path, image_size, tot_size);
+		fclose(bo_contents_fp);
+		return NULL;
+	}
+	return bo_contents_fp;
+}
+
+struct parallel_thread_data {
+	pthread_t thread;
+	uint32_t gpu_id;
+	int minor;
+	parallel_restore_cmd *restore_cmd;
+	int ret;
+};
+
+void *parallel_restore_bo_contents(void *_thread_data)
+{
+	struct parallel_thread_data *thread_data = (struct parallel_thread_data *)_thread_data;
+	amdgpu_device_handle h_dev;
+	uint64_t max_copy_size;
+	size_t total_bo_size = 0, max_bo_size = 0, buffer_size = 0;
+	FILE *bo_contents_fp = NULL;
+	parallel_restore_entry *entry;
+	parallel_restore_cmd *restore_cmd = thread_data->restore_cmd;
+	int ret = 0;
+	int offset = 0;
+	void *buffer = NULL;
+
+	ret = init_dev(thread_data->minor, &h_dev, &max_copy_size);
+	if (ret) {
+		goto err;
+	}
+
+	for (int i = 0; i < restore_cmd->cmd_head.entry_num; i++) {
+		if (restore_cmd->entries[i].gpu_id == thread_data->gpu_id) {
+			total_bo_size += restore_cmd->entries[i].size;
+			max_bo_size = max(restore_cmd->entries[i].size, max_bo_size);
+		}
+	}
+
+	buffer_size = kfd_max_buffer_size > 0 ? min(kfd_max_buffer_size, max_bo_size) : max_bo_size;
+
+	bo_contents_fp = get_bo_contents_fp(restore_cmd->cmd_head.id, thread_data->gpu_id, total_bo_size);
+	if (bo_contents_fp == NULL) {
+		ret = -1;
+		goto err_sdma;
+	}
+	offset = ftell(bo_contents_fp);
+
+	posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size);
+	if (!buffer) {
+		pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE.");
+		ret = -ENOMEM;
+		goto err_sdma;
+	}
+
+	for (int i = 0; i < restore_cmd->cmd_head.entry_num; i++) {
+		if (restore_cmd->entries[i].gpu_id != thread_data->gpu_id)
+			continue;
+
+		entry = &restore_cmd->entries[i];
+		fseeko(bo_contents_fp, entry->read_offset + offset, SEEK_SET);
+		ret = sdma_copy_bo(restore_cmd->fds_write[entry->write_id], entry->size, bo_contents_fp,
+				   buffer, buffer_size, h_dev,
+				   max_copy_size, SDMA_OP_VRAM_WRITE, false);
+
+		if (ret) {
+			pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i);
+			goto err_sdma;
+		}
+	}
+
+err_sdma:
+	if (bo_contents_fp)
+		fclose(bo_contents_fp);
+	if (buffer)
+		xfree(buffer);
+	amdgpu_device_deinitialize(h_dev);
+err:
+	thread_data->ret = ret;
+	return NULL;
+}
+
+void *restore_device_parallel_worker(void *arg)
+{
+	while (1) {
+		parallel_restore_cmd restore_cmd = { 0 };
+		struct parallel_thread_data *thread_datas = NULL;
+		int ret;
+		int error_occurred = 0, join_ret = 0, created_threads = 0;
+
+		ret = recv_parallel_restore_cmd(&restore_cmd);
+		if (ret) {
+			if (ret == 1) {
+				*(int *)arg = 0;
+				goto exit;
+			}
+			goto err;
+		}
+
+		thread_datas = xzalloc(sizeof(*thread_datas) * restore_cmd.cmd_head.gpu_num);
+		if (!thread_datas) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		for (; created_threads < restore_cmd.cmd_head.gpu_num; created_threads++) {
+			thread_datas[created_threads].gpu_id = restore_cmd.gpu_ids[created_threads].gpu_id;
+			thread_datas[created_threads].minor = restore_cmd.gpu_ids[created_threads].minor;
+			thread_datas[created_threads].restore_cmd = &restore_cmd;
+
+			ret = pthread_create(&thread_datas[created_threads].thread, NULL, parallel_restore_bo_contents,
+					     (void *)&thread_datas[created_threads]);
+			if (ret) {
+				pr_err("Failed to create thread[0x%x] ret:%d\n", thread_datas[created_threads].gpu_id, ret);
+				error_occurred = 1;
+				break;
+			}
+		}
+
+		for (int i = 0; i < created_threads; i++) {
+			join_ret = pthread_join(thread_datas[i].thread, NULL);
+			if (join_ret != 0) {
+				pr_err("pthread_join failed for Thread[0x%x] ret:%d\n",
+				       thread_datas[i].gpu_id, join_ret);
+				if (!error_occurred) {
+					ret = join_ret;
+					error_occurred = 1;
+				}
+			}
+
+			pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret);
+
+			/* Check thread return value */
+			if (thread_datas[i].ret && !error_occurred) {
+				ret = thread_datas[i].ret;
+				error_occurred = 1;
+			}
+		}
+
+		if (thread_datas)
+			xfree(thread_datas);
+err:
+		free_parallel_restore_cmd(&restore_cmd);
+
+		if (ret) {
+			*(int *)arg = ret;
+			return NULL;
+		}
+	}
+exit:
+	return NULL;
+}
+
+/*
+ * While the background thread is running, some processing functions (e.g., stop_cgroupd)
+ * in the main thread need to block SIGCHLD. To prevent interference from this background
+ * thread, SIGCHLD is blocked in this thread.
+ */
+static int back_thread_create(pthread_t *newthread, void *(*f)(void *), void *arg)
+{
+	int ret = 0;
+	sigset_t blockmask, oldmask;
+
+	sigemptyset(&blockmask);
+	sigaddset(&blockmask, SIGCHLD);
+	sigprocmask(SIG_BLOCK, &blockmask, &oldmask);
+
+	ret = pthread_create(newthread, NULL, f, arg);
+	if (ret) {
+		pr_err("Create worker thread fail: %d\n", ret);
+		return -1;
+	}
+
+	sigprocmask(SIG_SETMASK, &oldmask, NULL);
+	return 0;
+}
+
+int amdgpu_plugin_post_forking(void)
+{
+	if (plugin_disabled)
+		return -ENOTSUP;
+
+	if (parallel_disabled)
+		return 0;
+
+	return back_thread_create(&parallel_thread, restore_device_parallel_worker, &parallel_thread_result);
+}
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__POST_FORKING, amdgpu_plugin_post_forking)
diff --git a/plugins/amdgpu/amdgpu_plugin_dmabuf.c b/plugins/amdgpu/amdgpu_plugin_dmabuf.c
new file mode 100644
index 000000000..11c9792e3
--- /dev/null
+++ b/plugins/amdgpu/amdgpu_plugin_dmabuf.c
@@ -0,0 +1,197 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <linux/limits.h>
+
+#include "common/list.h"
+#include "criu-amdgpu.pb-c.h"
+
+#include "xmalloc.h"
+#include "criu-log.h"
+#include "amdgpu_plugin_drm.h"
+#include "amdgpu_plugin_util.h"
+#include "amdgpu_plugin_dmabuf.h"
+#include "fdstore.h"
+
+#include "util.h"
+#include "common/scm.h"
+
+struct dmabuf {
+	int id;
+	int dmabuf_fd;
+	struct list_head node;
+};
+
+static LIST_HEAD(dmabuf_list);
+
+/* Return < 0 for error, > 0 for "not a dmabuf" and 0 "is a dmabuf" */
+int get_dmabuf_info(int fd, struct stat *st)
+{
+	char path[PATH_MAX];
+
+	if (read_fd_link(fd, path, sizeof(path)) < 0)
+		return -1;
+
+	if (strncmp(path, DMABUF_LINK, strlen(DMABUF_LINK)) != 0)
+		return 1;
+
+	return 0;
+}
+
+int __amdgpu_plugin_dmabuf_dump(int dmabuf_fd, int id)
+{
+	int ret = 0;
+	char path[PATH_MAX];
+	size_t len = 0;
+	unsigned char *buf = NULL;
+	int gem_handle;
+
+	gem_handle = handle_for_shared_bo_fd(dmabuf_fd);
+	if (gem_handle < 0) {
+		pr_err("Failed to get handle for dmabuf_fd = %d\n", dmabuf_fd);
+		return -EAGAIN; /* Retry needed */
+	}
+
+	CriuDmabufNode *node = xmalloc(sizeof(*node));
+	if (!node) {
+		pr_err("Failed to allocate memory for dmabuf node\n");
+		return -ENOMEM;
+	}
+	criu_dmabuf_node__init(node);
+
+	node->gem_handle = gem_handle;
+
+	if (node->gem_handle < 0) {
+		pr_err("Failed to get handle for dmabuf_fd\n");
+		xfree(node);
+		return -EINVAL;
+	}
+
+	/* Serialize metadata to a file */
+	snprintf(path, sizeof(path), IMG_DMABUF_FILE, id);
+	len = criu_dmabuf_node__get_packed_size(node);
+	buf = xmalloc(len);
+	if (!buf) {
+		pr_err("Failed to allocate buffer for dmabuf metadata\n");
+		xfree(node);
+		return -ENOMEM;
+	}
+	criu_dmabuf_node__pack(node, buf);
+	ret = write_img_file(path, buf, len);
+
+	xfree(buf);
+	xfree(node);
+	return ret;
+}
+
+int amdgpu_plugin_dmabuf_restore(int id)
+{
+	char path[PATH_MAX];
+	size_t img_size;
+	FILE *img_fp = NULL;
+	int ret = 0;
+	CriuDmabufNode *rd = NULL;
+	unsigned char *buf = NULL;
+	int fd_id;
+
+	snprintf(path, sizeof(path), IMG_DMABUF_FILE, id);
+
+	/* Read serialized metadata */
+	img_fp = open_img_file(path, false, &img_size);
+	if (!img_fp) {
+		pr_err("Failed to open dmabuf metadata file: %s\n", path);
+		return -EINVAL;
+	}
+
+	pr_debug("dmabuf Image file size:%ld\n", img_size);
+	buf = xmalloc(img_size);
+	if (!buf) {
+		pr_perror("Failed to allocate memory");
+		return -ENOMEM;
+	}
+
+	ret = read_fp(img_fp, buf, img_size);
+	if (ret) {
+		pr_perror("Unable to read from %s", path);
+		xfree(buf);
+		return ret;
+	}
+
+	rd = criu_dmabuf_node__unpack(NULL, img_size, buf);
+	if (rd == NULL) {
+		pr_perror("Unable to parse the dmabuf message %d", id);
+		xfree(buf);
+		fclose(img_fp);
+		return -1;
+	}
+	fclose(img_fp);
+
+	/* Match GEM handle with shared_dmabuf list */
+	fd_id = amdgpu_id_for_handle(rd->gem_handle);
+	if (fd_id == -1) {
+		pr_err("Failed to find dmabuf_fd for GEM handle = %d\n", rd->gem_handle);
+		return 1;
+	}
+
+	int dmabuf_fd = fdstore_get(fd_id);
+	if (dmabuf_fd == -1) {
+		pr_err("Failed to find dmabuf_fd for GEM handle = %d\n", rd->gem_handle);
+		return 1; /* Retry needed */
+	}
+
+	pr_info("Restored dmabuf_fd = %d for GEM handle = %d\n", dmabuf_fd, rd->gem_handle);
+	ret = dmabuf_fd;
+
+	pr_info("Successfully restored dmabuf_fd %d\n", dmabuf_fd);
+	criu_dmabuf_node__free_unpacked(rd, NULL);
+	xfree(buf);
+	return ret;
+}
+
+int amdgpu_plugin_dmabuf_dump(int dmabuf_fd, int id)
+{
+	int ret;
+
+	ret = __amdgpu_plugin_dmabuf_dump(dmabuf_fd, id);
+	if (ret == -EAGAIN) {
+		struct dmabuf *b = xmalloc(sizeof(*b));
+		b->id = id;
+		b->dmabuf_fd = dmabuf_fd;
+		list_add(&b->node, &dmabuf_list);
+		return 0;
+	}
+	return ret;
+}
+
+int try_dump_dmabuf_list()
+{
+	struct dmabuf *b, *t;
+	list_for_each_entry_safe(b, t, &dmabuf_list, node) {
+		int ret = __amdgpu_plugin_dmabuf_dump(b->dmabuf_fd, b->id);
+		if (ret == -EAGAIN)
+			continue;
+		if (ret)
+			return ret;
+		list_del(&b->node);
+		xfree(b);
+	}
+	return 0;
+}
+
+int post_dump_dmabuf_check()
+{
+	if (!list_empty(&dmabuf_list)) {
+		pr_err("Not all dma buffers have been dumped\n");
+		return -1;
+	}
+	return 0;
+}
diff --git a/plugins/amdgpu/amdgpu_plugin_dmabuf.h b/plugins/amdgpu/amdgpu_plugin_dmabuf.h
new file mode 100644
index 000000000..f07af7ee0
--- /dev/null
+++ b/plugins/amdgpu/amdgpu_plugin_dmabuf.h
@@ -0,0 +1,16 @@
+
+#ifndef __AMDGPU_PLUGIN_DMABUF_H__
+#define __AMDGPU_PLUGIN_DMABUF_H__
+
+#include "amdgpu_plugin_util.h"
+#include "criu-amdgpu.pb-c.h"
+
+int amdgpu_plugin_dmabuf_dump(int fd, int id);
+int amdgpu_plugin_dmabuf_restore(int id);
+
+int try_dump_dmabuf_list();
+int post_dump_dmabuf_check();
+
+int get_dmabuf_info(int fd, struct stat *st);
+
+#endif /* __AMDGPU_PLUGIN_DMABUF_H__ */
\ No newline at end of file
diff --git a/plugins/amdgpu/amdgpu_plugin_drm.c b/plugins/amdgpu/amdgpu_plugin_drm.c
new file mode 100644
index 000000000..3520bca7a
--- /dev/null
+++ b/plugins/amdgpu/amdgpu_plugin_drm.c
@@ -0,0 +1,569 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <semaphore.h>
+
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <sys/sysmacros.h>
+#include <linux/limits.h>
+
+#include <dirent.h>
+#include "common/list.h"
+#include "files.h"
+#include "fdstore.h"
+
+#include "criu-amdgpu.pb-c.h"
+
+/* Define __user as empty for kernel headers in user-space */
+#define __user
+#include "drm.h"
+
+#include <xf86drm.h>
+#include <libdrm/amdgpu.h>
+
+#include "xmalloc.h"
+#include "amdgpu_drm.h"
+#include "amdgpu_plugin_drm.h"
+#include "amdgpu_plugin_util.h"
+#include "amdgpu_plugin_topology.h"
+
+#include "util.h"
+#include "common/scm.h"
+
+int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd)
+{
+	uint32_t handle;
+	int fd = amdgpu_device_get_fd(h_dev);
+
+	if (dmabuf_fd == -1) {
+		return -1;
+	}
+
+	if (drmPrimeFDToHandle(fd, dmabuf_fd, &handle))
+		return -1;
+
+	return handle;
+}
+
+int drmIoctl(int fd, unsigned long request, void *arg)
+{
+	int ret, max_retries = 200;
+
+	do {
+		ret = ioctl(fd, request, arg);
+	} while (ret == -1 && max_retries-- > 0 && (errno == EINTR || errno == EAGAIN));
+
+	if (ret == -1 && errno == EBADF)
+		/* In case pthread_atfork didn't catch it, this will
+		 * make any subsequent hsaKmt calls fail in CHECK_KFD_OPEN.
+		 */
+		pr_perror("KFD file descriptor not valid in this process");
+	return ret;
+}
+
+static int allocate_bo_entries(CriuRenderNode *e, int num_bos)
+{
+	e->bo_entries = xmalloc(sizeof(DrmBoEntry *) * num_bos);
+	if (!e->bo_entries) {
+		pr_err("Failed to allocate bo_info\n");
+		return -ENOMEM;
+	}
+
+	for (int i = 0; i < num_bos; i++) {
+		DrmBoEntry *entry = xzalloc(sizeof(*entry));
+
+		if (!entry) {
+			pr_err("Failed to allocate botest\n");
+			return -ENOMEM;
+		}
+
+		drm_bo_entry__init(entry);
+
+		e->bo_entries[i] = entry;
+		e->n_bo_entries++;
+	}
+	return 0;
+}
+
+static int allocate_vm_entries(DrmBoEntry *e, int num_vms)
+{
+	e->vm_entries = xmalloc(sizeof(DrmVmEntry *) * num_vms);
+	if (!e->vm_entries) {
+		pr_err("Failed to allocate bo_info\n");
+		return -ENOMEM;
+	}
+
+	for (int i = 0; i < num_vms; i++) {
+		DrmVmEntry *entry = xzalloc(sizeof(*entry));
+
+		if (!entry) {
+			pr_err("Failed to allocate botest\n");
+			return -ENOMEM;
+		}
+
+		drm_vm_entry__init(entry);
+
+		e->vm_entries[i] = entry;
+		e->n_vm_entries++;
+	}
+	return 0;
+}
+
+static void free_e(CriuRenderNode *e)
+{
+	for (int i = 0; i < e->n_bo_entries; i++) {
+		if (e->bo_entries[i])
+			xfree(e->bo_entries[i]);
+	}
+
+	xfree(e);
+}
+
+int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st)
+{
+	char path[PATH_MAX];
+	struct stat drm;
+	int ret = 0;
+
+	snprintf(path, sizeof(path), AMDGPU_DRM_DEVICE, DRM_FIRST_RENDER_NODE);
+	ret = stat(path, &drm);
+	if (ret == -1) {
+		pr_err("Error in getting stat for: %s\n", path);
+		return ret;
+	}
+
+	if ((major(st->st_rdev) != major(drm.st_rdev)) ||
+		(minor(st->st_rdev) < minor(drm.st_rdev)) ||
+		(minor(st->st_rdev) > DRM_LAST_RENDER_NODE)) {
+		pr_err("Can't handle VMA mapping of input device\n");
+		return -ENOTSUP;
+	}
+
+	pr_debug("AMD DRI(maj,min) = %d:%d VMA Device FD(maj,min) = %d:%d\n",
+			 major(drm.st_rdev), minor(drm.st_rdev),
+			 major(st->st_rdev), minor(st->st_rdev));
+
+	return 0;
+}
+
+static int restore_bo_contents_drm(int drm_render_minor, CriuRenderNode *rd, int drm_fd, int *dmabufs)
+{
+	size_t image_size = 0, max_bo_size = 0, buffer_size;
+	struct amdgpu_gpu_info gpu_info = { 0 };
+	amdgpu_device_handle h_dev;
+	uint64_t max_copy_size;
+	uint32_t major, minor;
+	FILE *bo_contents_fp = NULL;
+	void *buffer = NULL;
+	char img_path[40];
+	int i, ret = 0;
+
+	ret = amdgpu_device_initialize(drm_fd, &major, &minor, &h_dev);
+	if (ret) {
+		pr_perror("failed to initialize device");
+		goto exit;
+	}
+	plugin_log_msg("libdrm initialized successfully\n");
+
+	ret = amdgpu_query_gpu_info(h_dev, &gpu_info);
+	if (ret) {
+		pr_perror("failed to query gpuinfo via libdrm");
+		goto exit;
+	}
+
+	max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE :
+								   SDMA_LINEAR_COPY_MAX_SIZE - 1;
+
+	for (i = 0; i < rd->num_of_bos; i++) {
+		if (rd->bo_entries[i]->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
+			if (rd->bo_entries[i]->size > max_bo_size)
+				max_bo_size = rd->bo_entries[i]->size;
+		}
+	}
+
+	buffer_size = max_bo_size;
+
+	posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size);
+	if (!buffer) {
+		pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE.");
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	for (i = 0; i < rd->num_of_bos; i++) {
+		if (!(rd->bo_entries[i]->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
+			continue;
+
+		if (rd->bo_entries[i]->num_of_vms == 0)
+			continue;
+
+		snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, rd->id, drm_render_minor, i);
+
+		bo_contents_fp = open_img_file(img_path, false, &image_size);
+
+		ret = sdma_copy_bo(dmabufs[i], rd->bo_entries[i]->size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
+				   SDMA_OP_VRAM_WRITE, true);
+		if (ret) {
+			pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i);
+			break;
+		}
+		plugin_log_msg("** Successfully filled the BO using sDMA: bo_buckets[%d] **\n", i);
+
+		if (bo_contents_fp)
+			fclose(bo_contents_fp);
+	}
+
+exit:
+	for (int i = 0; i < rd->num_of_bos; i++) {
+		if (dmabufs[i] != KFD_INVALID_FD)
+			close(dmabufs[i]);
+	}
+
+	xfree(buffer);
+
+	amdgpu_device_deinitialize(h_dev);
+	return ret;
+}
+
+int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm)
+{
+	CriuRenderNode *rd = NULL;
+	char path[PATH_MAX];
+	unsigned char *buf;
+	int minor;
+	int len;
+	int ret;
+	size_t image_size;
+	struct tp_node *tp_node;
+	struct drm_amdgpu_gem_list_handles list_handles_args = { 0 };
+	struct drm_amdgpu_gem_list_handles_entry *list_handles_entries;
+	int num_bos;
+
+	rd = xmalloc(sizeof(*rd));
+	if (!rd) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+	criu_render_node__init(rd);
+
+	/* Get the topology node of the DRM device */
+	minor = minor(drm->st_rdev);
+	rd->drm_render_minor = minor;
+	rd->id = id;
+
+	num_bos = 8;
+	list_handles_entries = xzalloc(sizeof(struct drm_amdgpu_gem_list_handles_entry) * num_bos);
+	list_handles_args.num_entries = num_bos;
+	list_handles_args.entries = (uintptr_t)list_handles_entries;
+
+	ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES, &list_handles_args);
+	if (ret && errno == EINVAL) {
+		pr_info("This kernel appears not to have AMDGPU_GEM_LIST_HANDLES ioctl. Consider disabling Dmabuf IPC or updating your kernel.\n");
+		list_handles_args.num_entries = 0;
+	} else if (ret) {
+		pr_perror("Failed to call bo info ioctl");
+		goto exit;
+	}
+
+	if (list_handles_args.num_entries > num_bos) {
+		num_bos = list_handles_args.num_entries;
+		xfree(list_handles_entries);
+		list_handles_entries = xzalloc(sizeof(struct drm_amdgpu_gem_list_handles_entry) * num_bos);
+		list_handles_args.num_entries = num_bos;
+		list_handles_args.entries = (uintptr_t)list_handles_entries;
+		ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES, &list_handles_args);
+		if (ret) {
+			pr_perror("Failed to call bo info ioctl");
+			goto exit;
+		}
+	} else {
+		num_bos = list_handles_args.num_entries;
+	}
+
+	rd->num_of_bos = num_bos;
+	ret = allocate_bo_entries(rd, num_bos);
+	if (ret)
+		goto exit;
+
+	for (int i = 0; i < num_bos; i++) {
+		int num_vm_entries = 8;
+		struct drm_amdgpu_gem_vm_entry *vm_info_entries;
+		struct drm_amdgpu_gem_op vm_info_args = { 0 };
+		DrmBoEntry *boinfo = rd->bo_entries[i];
+		struct drm_amdgpu_gem_list_handles_entry handle_entry = list_handles_entries[i];
+		union drm_amdgpu_gem_mmap mmap_args = { 0 };
+		int dmabuf_fd;
+		uint32_t major, minor;
+		amdgpu_device_handle h_dev;
+		void *buffer = NULL;
+		char img_path[40];
+		FILE *bo_contents_fp = NULL;
+		int device_fd;
+
+		boinfo->size = handle_entry.size;
+
+		boinfo->alloc_flags = handle_entry.alloc_flags;
+		boinfo->preferred_domains = handle_entry.preferred_domains;
+		boinfo->alignment = handle_entry.alignment;
+		boinfo->handle = handle_entry.gem_handle;
+		boinfo->is_import = (handle_entry.flags & AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT) || shared_bo_has_exporter(boinfo->handle);
+
+		mmap_args.in.handle = boinfo->handle;
+
+		if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &mmap_args) == -1) {
+			pr_perror("Error Failed to call mmap ioctl");
+			ret = -1;
+			goto exit;
+		}
+
+		boinfo->offset = mmap_args.out.addr_ptr;
+
+		vm_info_entries = xzalloc(sizeof(struct drm_amdgpu_gem_vm_entry) * num_vm_entries);
+		vm_info_args.handle = handle_entry.gem_handle;
+		vm_info_args.num_entries = num_vm_entries;
+		vm_info_args.value = (uintptr_t)vm_info_entries;
+		vm_info_args.op = AMDGPU_GEM_OP_GET_MAPPING_INFO;
+		ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_OP, &vm_info_args);
+		if (ret) {
+			pr_perror("Failed to call vm info ioctl");
+			goto exit;
+		}
+
+		if (vm_info_args.num_entries > num_vm_entries) {
+			num_vm_entries = vm_info_args.num_entries;
+			xfree(vm_info_entries);
+			vm_info_entries = xzalloc(sizeof(struct drm_amdgpu_gem_vm_entry) * num_vm_entries);
+			vm_info_args.handle = handle_entry.gem_handle;
+			vm_info_args.num_entries = num_vm_entries;
+			vm_info_args.value = (uintptr_t)vm_info_entries;
+			vm_info_args.op = AMDGPU_GEM_OP_GET_MAPPING_INFO;
+			ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_OP, &vm_info_args);
+			if (ret) {
+				pr_perror("Failed to call vm info ioctl");
+				goto exit;
+			}
+		} else {
+			num_vm_entries = vm_info_args.num_entries;
+		}
+
+		boinfo->num_of_vms = num_vm_entries;
+		ret = allocate_vm_entries(boinfo, num_vm_entries);
+		if (ret)
+			goto exit;
+
+		for (int j = 0; j < num_vm_entries; j++) {
+			DrmVmEntry *vminfo = boinfo->vm_entries[j];
+
+			boinfo->addr = vm_info_entries[j].addr;
+			vminfo->addr = vm_info_entries[j].addr;
+			vminfo->size = vm_info_entries[j].size;
+			vminfo->offset = vm_info_entries[j].offset;
+			vminfo->flags = vm_info_entries[j].flags;
+		}
+
+		ret = amdgpu_device_initialize(fd, &major, &minor, &h_dev);
+
+		device_fd = amdgpu_device_get_fd(h_dev);
+
+		drmPrimeHandleToFD(device_fd, boinfo->handle, 0, &dmabuf_fd);
+
+		snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, rd->id, rd->drm_render_minor, i);
+		bo_contents_fp = open_img_file(img_path, true, &image_size);
+
+		posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), handle_entry.size);
+
+		ret = sdma_copy_bo(dmabuf_fd, handle_entry.size, bo_contents_fp, buffer, handle_entry.size, h_dev, 0x1000,
+				   SDMA_OP_VRAM_READ, false);
+
+		if (dmabuf_fd != KFD_INVALID_FD)
+			close(dmabuf_fd);
+
+		if (bo_contents_fp)
+			fclose(bo_contents_fp);
+
+		ret = amdgpu_device_deinitialize(h_dev);
+		if (ret)
+			goto exit;
+
+		xfree(vm_info_entries);
+	}
+	xfree(list_handles_entries);
+
+	for (int i = 0; i < num_bos; i++) {
+		DrmBoEntry *boinfo = rd->bo_entries[i];
+
+		ret = record_shared_bo(boinfo->handle, boinfo->is_import);
+		if (ret)
+			goto exit;
+	}
+
+	tp_node = sys_get_node_by_render_minor(&src_topology, minor);
+	if (!tp_node) {
+		pr_err("Failed to find a device with minor number = %d\n", minor);
+		return -ENODEV;
+	}
+
+	/* Get the GPU_ID of the DRM device */
+	rd->gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id);
+	if (!rd->gpu_id) {
+		pr_err("Failed to find valid gpu_id for the device = %d\n", rd->gpu_id);
+		return -ENODEV;
+	}
+
+	len = criu_render_node__get_packed_size(rd);
+	buf = xmalloc(len);
+	if (!buf)
+		return -ENOMEM;
+
+	criu_render_node__pack(rd, buf);
+
+	snprintf(path, sizeof(path), IMG_DRM_FILE, id);
+	ret = write_img_file(path, buf, len);
+
+	xfree(buf);
+exit:
+	free_e(rd);
+	return ret;
+}
+
+int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd)
+{
+	int ret = 0;
+	bool retry_needed = false;
+	uint32_t major, minor;
+	amdgpu_device_handle h_dev;
+	int device_fd;
+	int *dmabufs = xzalloc(sizeof(int) * rd->num_of_bos);
+
+	ret = amdgpu_device_initialize(fd, &major, &minor, &h_dev);
+	if (ret) {
+		pr_info("Error in init amdgpu device\n");
+		goto exit;
+	}
+
+	device_fd = amdgpu_device_get_fd(h_dev);
+
+	for (int i = 0; i < rd->num_of_bos; i++) {
+		DrmBoEntry *boinfo = rd->bo_entries[i];
+		int dmabuf_fd = -1;
+		uint32_t handle;
+		struct drm_gem_change_handle change_args = { 0 };
+		union drm_amdgpu_gem_mmap mmap_args = { 0 };
+		struct drm_amdgpu_gem_va va_args = { 0 };
+		int fd_id;
+
+		if (work_already_completed(boinfo->handle, rd->drm_render_minor)) {
+			continue;
+		} else if (boinfo->handle != -1) {
+			if (boinfo->is_import) {
+				fd_id = amdgpu_id_for_handle(boinfo->handle);
+				if (fd_id == -1) {
+					retry_needed = true;
+					continue;
+				}
+				dmabuf_fd = fdstore_get(fd_id);
+			}
+		}
+
+		if (boinfo->is_import) {
+			drmPrimeFDToHandle(device_fd, dmabuf_fd, &handle);
+		} else {
+			union drm_amdgpu_gem_create create_args = { 0 };
+
+			create_args.in.bo_size = boinfo->size;
+			create_args.in.alignment = boinfo->alignment;
+			create_args.in.domains = boinfo->preferred_domains;
+			create_args.in.domain_flags = boinfo->alloc_flags;
+
+			if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_CREATE, &create_args) == -1) {
+				pr_perror("Error Failed to call create ioctl");
+				ret = -1;
+				goto exit;
+			}
+			handle = create_args.out.handle;
+
+			drmPrimeHandleToFD(device_fd, handle, 0, &dmabuf_fd);
+		}
+
+		change_args.handle = handle;
+		change_args.new_handle = boinfo->handle;
+
+		if (drmIoctl(fd, DRM_IOCTL_GEM_CHANGE_HANDLE, &change_args) == -1) {
+			pr_perror("Error Failed to call change ioctl; check if the kernel has DRM_IOCTL_GEM_CHANGE_HANDLE support");
+			ret = -1;
+			goto exit;
+		}
+
+		if (!boinfo->is_import)
+			store_dmabuf_fd(boinfo->handle, dmabuf_fd);
+
+		dmabufs[i] = dmabuf_fd;
+
+		ret = record_completed_work(boinfo->handle, rd->drm_render_minor);
+		if (ret)
+			goto exit;
+
+		mmap_args.in.handle = boinfo->handle;
+
+		if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &mmap_args) == -1) {
+			pr_perror("Error Failed to call mmap ioctl");
+			ret = -1;
+			goto exit;
+		}
+
+		for (int j = 0; j < boinfo->num_of_vms; j++) {
+			DrmVmEntry *vminfo = boinfo->vm_entries[j];
+
+			va_args.handle = boinfo->handle;
+			va_args.operation = AMDGPU_VA_OP_MAP;
+			va_args.flags = vminfo->flags;
+			va_args.va_address = vminfo->addr;
+			va_args.offset_in_bo = vminfo->offset;
+			va_args.map_size = vminfo->size;
+
+			if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_VA, &va_args) == -1) {
+				pr_perror("Error Failed to call gem va ioctl");
+				ret = -1;
+				goto exit;
+			}
+		}
+
+		ret = save_vma_updates(boinfo->offset, boinfo->addr, mmap_args.out.addr_ptr, fd);
+		if (ret < 0)
+			goto exit;
+	}
+
+	if (ret) {
+		pr_info("Error in deinit amdgpu device\n");
+		goto exit;
+	}
+
+	ret = record_completed_work(-1, rd->drm_render_minor);
+	if (ret)
+		goto exit;
+
+	ret = amdgpu_device_deinitialize(h_dev);
+
+	if (rd->num_of_bos > 0) {
+		ret = restore_bo_contents_drm(rd->drm_render_minor, rd, fd, dmabufs);
+		if (ret)
+			goto exit;
+	}
+
+exit:
+	if (ret < 0)
+		return ret;
+	xfree(dmabufs);
+
+	return retry_needed;
+}
diff --git a/plugins/amdgpu/amdgpu_plugin_drm.h b/plugins/amdgpu/amdgpu_plugin_drm.h
new file mode 100644
index 000000000..c766def56
--- /dev/null
+++ b/plugins/amdgpu/amdgpu_plugin_drm.h
@@ -0,0 +1,40 @@
+#ifndef __AMDGPU_PLUGIN_DRM_H__
+#define __AMDGPU_PLUGIN_DRM_H__
+
+#include <dirent.h>
+#include "common/list.h"
+
+#include "xmalloc.h"
+#include "criu-log.h"
+#include "kfd_ioctl.h"
+#include "amdgpu_plugin_util.h"
+#include "amdgpu_plugin_topology.h"
+
+
+/**
+ * Determines if VMA's of input file descriptor belong to amdgpu's
+ * DRM device and are therefore supported
+ */
+int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *drm);
+
+/**
+ * Serialize meta-data about a particular DRM device, its number of BOs,
+ * etc into a file. The serialized filename has in it the value ID that
+ * is passed in as a parameter
+ */
+int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm);
+
+int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd);
+
+int amdgpu_plugin_drm_unpause_file(int fd);
+
+int amdgpu_id_for_handle(int handle);
+
+int store_dmabuf_fd(int handle, int fd);
+
+int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd);
+
+int save_vma_updates(uint64_t offset, uint64_t addr, uint64_t restored_offset, int gpu_id);
+
+#endif		/* __AMDGPU_PLUGIN_DRM_H__ */
+
diff --git a/plugins/amdgpu/amdgpu_plugin_topology.c b/plugins/amdgpu/amdgpu_plugin_topology.c
index 42689933e..730f2e028 100644
--- a/plugins/amdgpu/amdgpu_plugin_topology.c
+++ b/plugins/amdgpu/amdgpu_plugin_topology.c
@@ -16,34 +16,11 @@
 
 #include "xmalloc.h"
 #include "kfd_ioctl.h"
+#include "amdgpu_plugin_util.h"
 #include "amdgpu_plugin_topology.h"
 
 #define TOPOLOGY_PATH "/sys/class/kfd/kfd/topology/nodes/"
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE 1
-#endif
-
-#ifdef COMPILE_TESTS
-#undef pr_err
-#define pr_err(format, arg...) fprintf(stdout, "%s:%d ERROR:" format, __FILE__, __LINE__, ##arg)
-#undef pr_info
-#define pr_info(format, arg...) fprintf(stdout, "%s:%d INFO:" format, __FILE__, __LINE__, ##arg)
-#undef pr_debug
-#define pr_debug(format, arg...) fprintf(stdout, "%s:%d DBG:" format, __FILE__, __LINE__, ##arg)
-
-#undef pr_perror
-#define pr_perror(format, arg...) \
-	fprintf(stdout, "%s:%d: " format " (errno = %d (%s))\n", __FILE__, __LINE__, ##arg, errno, strerror(errno))
-#endif
-
-#ifdef DEBUG
-#define plugin_log_msg(fmt, ...) pr_debug(fmt, ##__VA_ARGS__)
-#else
-#define plugin_log_msg(fmt, ...) \
-	{                        \
-	}
-#endif
+#define MAX_PARAMETER_LEN 64
 
 /* User override options */
 /* Skip firmware version check */
@@ -68,7 +45,7 @@ bool kfd_capability_check = true;
  */
 int fd_next = -1;
 
-static int open_drm_render_device(int minor)
+int open_drm_render_device(int minor)
 {
 	char path[128];
 	int fd, ret_fd;
@@ -441,7 +418,9 @@ struct tp_node *sys_add_node(struct tp_system *sys, uint32_t id, uint32_t gpu_id
 
 static bool get_prop(char *line, char *name, uint64_t *value)
 {
-	if (sscanf(line, " %29s %lu", name, value) != 2)
+	char format[16];
+	sprintf(format, " %%%ds %%lu", MAX_PARAMETER_LEN);
+	if (sscanf(line, format, name, value) != 2)
 		return false;
 	return true;
 }
@@ -461,7 +440,7 @@ static int parse_topo_node_properties(struct tp_node *dev, const char *dir_path)
 	}
 
 	while (fgets(line, sizeof(line), file)) {
-		char name[30];
+		char name[MAX_PARAMETER_LEN + 1];
 		uint64_t value;
 
 		memset(name, 0, sizeof(name));
@@ -589,7 +568,7 @@ static int parse_topo_node_mem_banks(struct tp_node *node, const char *dir_path)
 			}
 
 			while (fgets(line, sizeof(line), file)) {
-				char name[30];
+				char name[MAX_PARAMETER_LEN + 1];
 				uint64_t value;
 
 				memset(name, 0, sizeof(name));
@@ -678,7 +657,7 @@ static int parse_topo_node_iolinks(struct tp_node *node, const char *dir_path)
 			}
 
 			while (fgets(line, sizeof(line), file)) {
-				char name[30];
+				char name[MAX_PARAMETER_LEN + 1];
 				uint64_t value;
 
 				memset(name, 0, sizeof(name));
@@ -840,6 +819,9 @@ void topology_free(struct tp_system *sys)
 		list_del(&p2pgroup->listm_system);
 		xfree(p2pgroup);
 	}
+
+	/* Update Topology as being freed */
+	sys->parsed = false;
 }
 
 /**
@@ -1063,7 +1045,7 @@ static bool iolink_match(struct tp_iolink *src, struct tp_iolink *dest)
  *
  * Nodes compatibility are determined by:
  * 1. Comparing the node properties
- * 2. Making sure iolink mappings to CPUs would be compabitle with existing iolink mappings in maps
+ * 2. Making sure iolink mappings to CPUs would be compatible with existing iolink mappings in maps
  *
  * If src_node and dest_node are mappable, then map_device will push the new mapping
  * for src_node -> dest_node into new_maps.
@@ -1241,7 +1223,7 @@ static bool map_devices(struct tp_system *src_sys, struct tp_system *dest_sys, s
 				return true;
 			} else {
 				/* We could not map remaining nodes in the list. Add dest node back
-				 * to list and try to map next dest ndoe in list to current src
+				 * to list and try to map next dest node in list to current src
 				 * node.
 				 */
 				pr_debug("Nodes after [0x%04X -> 0x%04X] did not match, "
@@ -1461,3 +1443,15 @@ int set_restore_gpu_maps(struct tp_system *src_sys, struct tp_system *dest_sys,
 
 	return ret;
 }
+
+int topology_gpu_count(struct tp_system *sys)
+{
+	struct tp_node *node;
+	int count = 0;
+
+	list_for_each_entry(node, &sys->nodes, listm_system)
+		if (NODE_IS_GPU(node))
+			count++;
+	return count;
+}
+
diff --git a/plugins/amdgpu/amdgpu_plugin_topology.h b/plugins/amdgpu/amdgpu_plugin_topology.h
index 9d99cda1c..e19f8e7ce 100644
--- a/plugins/amdgpu/amdgpu_plugin_topology.h
+++ b/plugins/amdgpu/amdgpu_plugin_topology.h
@@ -107,6 +107,8 @@ int topology_parse(struct tp_system *topology, const char *msg);
 int topology_determine_iolinks(struct tp_system *sys);
 void topology_print(const struct tp_system *sys, const char *msg);
 
+int topology_gpu_count(struct tp_system *topology);
+
 struct id_map *maps_add_gpu_entry(struct device_maps *maps, const uint32_t src_id, const uint32_t dest_id);
 
 struct tp_node *sys_add_node(struct tp_system *sys, uint32_t id, uint32_t gpu_id);
@@ -116,6 +118,7 @@ struct tp_node *sys_get_node_by_gpu_id(const struct tp_system *sys, const uint32
 struct tp_node *sys_get_node_by_render_minor(const struct tp_system *sys, const int drm_render_minor);
 struct tp_node *sys_get_node_by_index(const struct tp_system *sys, uint32_t index);
 
+int open_drm_render_device(int minor);
 int node_get_drm_render_device(struct tp_node *node);
 void sys_close_drm_render_devices(struct tp_system *sys);
 
diff --git a/plugins/amdgpu/amdgpu_plugin_util.c b/plugins/amdgpu/amdgpu_plugin_util.c
new file mode 100644
index 000000000..592562474
--- /dev/null
+++ b/plugins/amdgpu/amdgpu_plugin_util.c
@@ -0,0 +1,330 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <semaphore.h>
+
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <sys/sysmacros.h>
+#include <linux/limits.h>
+
+#include <dirent.h>
+#include "common/list.h"
+
+#include <xf86drm.h>
+#include <libdrm/amdgpu.h>
+
+#include "criu-plugin.h"
+#include "plugin.h"
+#include "criu-amdgpu.pb-c.h"
+
+#include "img-streamer.h"
+#include "image.h"
+#include "cr_options.h"
+
+#include "xmalloc.h"
+#include "criu-log.h"
+#include "kfd_ioctl.h"
+#include "amdgpu_drm.h"
+#include "amdgpu_plugin_util.h"
+#include "amdgpu_plugin_topology.h"
+#include "amdgpu_plugin_drm.h"
+
+static LIST_HEAD(dumped_fds);
+static LIST_HEAD(shared_bos);
+static LIST_HEAD(completed_work);
+
+/* Helper structures to encode device topology of SRC and DEST platforms */
+struct tp_system src_topology;
+struct tp_system dest_topology;
+
+/* Helper structures to encode device maps during Checkpoint and Restore operations */
+struct device_maps checkpoint_maps;
+struct device_maps restore_maps;
+
+int record_dumped_fd(int fd, bool is_drm)
+{
+	int newfd = dup(fd);
+
+	if (newfd < 0)
+		return newfd;
+	struct dumped_fd *st = malloc(sizeof(struct dumped_fd));
+	if (!st)
+		return -1;
+	st->fd = newfd;
+	st->is_drm = is_drm;
+	list_add(&st->l, &dumped_fds);
+
+	return 0;
+}
+
+struct list_head *get_dumped_fds()
+{
+	return &dumped_fds;
+}
+
+bool shared_bo_has_exporter(int handle)
+{
+	struct shared_bo *bo;
+
+	if (handle == -1)
+		return false;
+
+	list_for_each_entry(bo, &shared_bos, l) {
+		if (bo->handle == handle) {
+			return bo->has_exporter;
+		}
+	}
+
+	return false;
+}
+
+int record_shared_bo(int handle, bool is_imported)
+{
+	struct shared_bo *bo;
+
+	if (handle == -1)
+		return 0;
+
+	list_for_each_entry(bo, &shared_bos, l) {
+		if (bo->handle == handle) {
+			return 0;
+		}
+	}
+	bo = malloc(sizeof(struct shared_bo));
+	if (!bo)
+		return -1;
+	bo->handle = handle;
+	bo->has_exporter = !is_imported;
+	list_add(&bo->l, &shared_bos);
+
+	return 0;
+}
+
+int handle_for_shared_bo_fd(int fd)
+{
+	struct dumped_fd *df;
+	int trial_handle;
+	amdgpu_device_handle h_dev;
+	uint32_t major, minor;
+	struct shared_bo *bo;
+
+	list_for_each_entry(df, &dumped_fds, l) {
+		/* see if the gem handle for fd using the hdev for df->fd is the
+		   same as bo->handle. */
+
+		if (!df->is_drm) {
+			continue;
+		}
+
+		if (amdgpu_device_initialize(df->fd, &major, &minor, &h_dev)) {
+			pr_err("Failed to initialize amdgpu device\n");
+			continue;
+		}
+
+		trial_handle = get_gem_handle(h_dev, fd);
+		if (trial_handle < 0)
+			continue;
+
+		list_for_each_entry(bo, &shared_bos, l) {
+			if (bo->handle == trial_handle)
+				return trial_handle;
+		}
+
+		amdgpu_device_deinitialize(h_dev);
+	}
+
+	return -1;
+}
+
+int record_completed_work(int handle, int id)
+{
+	struct restore_completed_work *work;
+
+	work = malloc(sizeof(struct restore_completed_work));
+	if (!work)
+		return -1;
+	work->handle = handle;
+	work->id = id;
+	list_add(&work->l, &completed_work);
+
+	return 0;
+}
+
+bool work_already_completed(int handle, int id)
+{
+	struct restore_completed_work *work;
+
+	list_for_each_entry(work, &completed_work, l) {
+		if (work->handle == handle && work->id == id) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+void clear_restore_state()
+{
+	while (!list_empty(&completed_work)) {
+		struct restore_completed_work *st = list_first_entry(&completed_work, struct restore_completed_work, l);
+		list_del(&st->l);
+		free(st);
+	}
+}
+
+void clear_dumped_fds()
+{
+	while (!list_empty(&dumped_fds)) {
+		struct dumped_fd *st = list_first_entry(&dumped_fds, struct dumped_fd, l);
+		list_del(&st->l);
+		close(st->fd);
+		free(st);
+	}
+}
+
+int read_fp(FILE *fp, void *buf, const size_t buf_len)
+{
+	size_t len_read;
+
+	len_read = fread(buf, 1, buf_len, fp);
+	if (len_read != buf_len) {
+		pr_err("Unable to read file (read:%ld buf_len:%ld)\n", len_read, buf_len);
+		return -EIO;
+	}
+	return 0;
+}
+
+int write_fp(FILE *fp, const void *buf, const size_t buf_len)
+{
+	size_t len_write;
+
+	len_write = fwrite(buf, 1, buf_len, fp);
+	if (len_write != buf_len) {
+		pr_err("Unable to write file (wrote:%ld buf_len:%ld)\n", len_write, buf_len);
+		return -EIO;
+	}
+	return 0;
+}
+
+/**
+ * @brief Open an image file
+ *
+ * We store the size of the actual contents in the first 8-bytes of
+ * the file. This allows us to determine the file size when using
+ * criu_image_streamer when fseek and fstat are not available. The
+ * FILE * returned is already at the location of the first actual
+ * contents.
+ *
+ * @param path The file path
+ * @param write False for read, true for write
+ * @param size Size of actual contents
+ * @return FILE *if successful, NULL if failed
+ */
+FILE *open_img_file(char *path, bool write, size_t *size)
+{
+	FILE *fp = NULL;
+	int fd, ret;
+
+	if (opts.stream)
+		fd = img_streamer_open(path, write ? O_DUMP : O_RSTR);
+	else
+		fd = openat(criu_get_image_dir(), path, write ? (O_WRONLY | O_CREAT) : O_RDONLY, 0600);
+
+	if (fd < 0) {
+		pr_err("%s: Failed to open for %s\n", path, write ? "write" : "read");
+		return NULL;
+	}
+
+	fp = fdopen(fd, write ? "w" : "r");
+	if (!fp) {
+		pr_err("%s: Failed get pointer for %s\n", path, write ? "write" : "read");
+		return NULL;
+	}
+
+	if (write)
+		ret = write_fp(fp, size, sizeof(*size));
+	else
+		ret = read_fp(fp, size, sizeof(*size));
+
+	if (ret) {
+		pr_err("%s:Failed to access file size\n", path);
+		fclose(fp);
+		return NULL;
+	}
+
+	pr_debug("%s:Opened file for %s with size:%ld\n", path, write ? "write" : "read", *size);
+	return fp;
+}
+
+int read_file(const char *file_path, void *buf, const size_t buf_len)
+{
+	int ret;
+	FILE *fp;
+
+	fp = fopen(file_path, "r");
+	if (!fp) {
+		pr_err("Cannot fopen %s\n", file_path);
+		return -errno;
+	}
+
+	ret = read_fp(fp, buf, buf_len);
+	fclose(fp); /* this will also close fd */
+	return ret;
+}
+
+
+/**
+ * @brief Write an image file
+ *
+ * We store the size of the actual contents in the first 8-bytes of the file. This allows us to
+ * determine the file size when using criu_image_streamer when fseek and fstat are not available.
+ *
+ * @param path The file path
+ * @param buf pointer to data to be written
+ * @param buf_len size of buf
+ * @return 0 if successful. -errno on failure
+ */
+int write_img_file(char *path, const void *buf, const size_t buf_len)
+{
+	int ret;
+	FILE *fp;
+	size_t len = buf_len;
+
+	fp = open_img_file(path, true, &len);
+	if (!fp)
+		return -errno;
+
+	ret = write_fp(fp, buf, buf_len);
+	fclose(fp); /* this will also close fd */
+	return ret;
+}
+
+void print_kfd_bo_stat(int bo_cnt, struct kfd_criu_bo_bucket *bo_list)
+{
+	struct kfd_criu_bo_bucket *bo;
+
+	pr_info("\n");
+	for (int idx = 0; idx < bo_cnt; idx++) {
+		bo = &bo_list[idx];
+		pr_info("\n");
+		pr_info("%s(), %d. KFD BO Addr: %" PRIx64 " \n", __func__, idx, bo->addr);
+		pr_info("%s(), %d. KFD BO Size: %" PRIx64 " \n", __func__, idx, bo->size);
+		pr_info("%s(), %d. KFD BO Offset: %" PRIx64 " \n", __func__, idx, bo->offset);
+		pr_info("%s(), %d. KFD BO Restored Offset: %" PRIx64 " \n", __func__, idx, bo->restored_offset);
+		pr_info("%s(), %d. KFD BO Alloc Flags: %x \n", __func__, idx, bo->alloc_flags);
+		pr_info("%s(), %d. KFD BO Gpu ID: %x \n", __func__, idx, bo->gpu_id);
+		pr_info("%s(), %d. KFD BO Dmabuf FD: %x \n", __func__, idx, bo->dmabuf_fd);
+		pr_info("\n");
+	}
+	pr_info("\n");
+}
diff --git a/plugins/amdgpu/amdgpu_plugin_util.h b/plugins/amdgpu/amdgpu_plugin_util.h
new file mode 100644
index 000000000..f5f752d0b
--- /dev/null
+++ b/plugins/amdgpu/amdgpu_plugin_util.h
@@ -0,0 +1,145 @@
+#ifndef __AMDGPU_PLUGIN_UTIL_H__
+#define __AMDGPU_PLUGIN_UTIL_H__
+
+#include <libdrm/amdgpu.h>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+
+#ifdef COMPILE_TESTS
+#undef pr_err
+#define pr_err(format, arg...) fprintf(stdout, "%s:%d ERROR:" format, __FILE__, __LINE__, ##arg)
+#undef pr_info
+#define pr_info(format, arg...) fprintf(stdout, "%s:%d INFO:" format, __FILE__, __LINE__, ##arg)
+#undef pr_debug
+#define pr_debug(format, arg...) fprintf(stdout, "%s:%d DBG:" format, __FILE__, __LINE__, ##arg)
+
+#undef pr_perror
+#define pr_perror(format, arg...) \
+	fprintf(stdout, "%s:%d: " format " (errno = %d (%s))\n", __FILE__, __LINE__, ##arg, errno, strerror(errno))
+#endif
+
+#ifdef LOG_PREFIX
+#undef LOG_PREFIX
+#endif
+#define LOG_PREFIX "amdgpu_plugin: "
+
+#ifdef DEBUG
+#define plugin_log_msg(fmt, ...) pr_debug(fmt, ##__VA_ARGS__)
+#else
+#define plugin_log_msg(fmt, ...) \
+	{                        \
+	}
+#endif
+
+
+/* Path where KFD device is surfaced */
+#define AMDGPU_KFD_DEVICE		"/dev/kfd"
+
+/* Path where DRM devices are surfaced */
+#define AMDGPU_DRM_DEVICE		"/dev/dri/renderD%d"
+
+/* Minimum version of KFD IOCTL's that supports C&R */
+#define KFD_IOCTL_MAJOR_VERSION			1
+#define MIN_KFD_IOCTL_MINOR_VERSION		8
+
+/* Name of file having serialized data of KFD device */
+#define IMG_KFD_FILE			"amdgpu-kfd-%d.img"
+
+/* Name of file having serialized data of KFD buffer objects (BOs) */
+#define IMG_KFD_PAGES_FILE		"amdgpu-pages-%d-%04x.img"
+
+/* Name of file having serialized data of DRM device */
+#define IMG_DRM_FILE			"amdgpu-renderD-%d.img"
+
+/* Name of file having serialized data of dmabuf meta */
+#define IMG_DMABUF_FILE "amdgpu-dmabuf_%d.img"
+
+/* Name of file having serialized data of DRM device buffer objects (BOs) */
+#define IMG_DRM_PAGES_FILE "amdgpu-drm-pages-%d-%d-%04x.img"
+
+/* Helper macros to Checkpoint and Restore a ROCm file */
+#define HSAKMT_SHM_PATH			"/dev/shm/hsakmt_shared_mem"
+#define HSAKMT_SHM				"/hsakmt_shared_mem"
+#define HSAKMT_SEM_PATH			"/dev/shm/sem.hsakmt_semaphore"
+#define HSAKMT_SEM				"hsakmt_semaphore"
+#define DMABUF_LINK				"/dmabuf"
+
+/* Help macros to build sDMA command packets */
+#define SDMA_PACKET(op, sub_op, e) ((((e)&0xFFFF) << 16) | (((sub_op)&0xFF) << 8) | (((op)&0xFF) << 0))
+
+#define SDMA_OPCODE_COPY	    1
+#define SDMA_COPY_SUB_OPCODE_LINEAR 0
+#define SDMA_NOP		    0
+#define SDMA_LINEAR_COPY_MAX_SIZE   (1ULL << 21)
+
+enum sdma_op_type {
+	SDMA_OP_VRAM_READ,
+	SDMA_OP_VRAM_WRITE,
+};
+
+struct dumped_fd {
+	struct list_head l;
+	int fd;
+	bool is_drm;
+};
+
+struct shared_bo {
+	struct list_head l;
+	int handle;
+	bool has_exporter;
+};
+
+struct restore_completed_work {
+	struct list_head l;
+	int handle;
+	int id;
+};
+
+/* Helper structures to encode device topology of SRC and DEST platforms */
+extern struct tp_system src_topology;
+extern struct tp_system dest_topology;
+
+/* Helper structures to encode device maps during Checkpoint and Restore operations */
+extern struct device_maps checkpoint_maps;
+extern struct device_maps restore_maps;
+
+extern int fd_next;
+
+extern bool kfd_fw_version_check;
+extern bool kfd_sdma_fw_version_check;
+extern bool kfd_caches_count_check;
+extern bool kfd_num_gws_check;
+extern bool kfd_vram_size_check;
+extern bool kfd_numa_check;
+extern bool kfd_capability_check;
+
+int read_fp(FILE *fp, void *buf, const size_t buf_len);
+int write_fp(FILE *fp, const void *buf, const size_t buf_len);
+int read_file(const char *file_path, void *buf, const size_t buf_len);
+int write_img_file(char *path, const void *buf, const size_t buf_len);
+FILE *open_img_file(char *path, bool write, size_t *size);
+
+int record_dumped_fd(int fd, bool is_drm);
+struct list_head *get_dumped_fds();
+void clear_dumped_fds();
+
+bool shared_bo_has_exporter(int handle);
+int record_shared_bo(int handle, bool is_imported);
+int handle_for_shared_bo_fd(int dmabuf_fd);
+
+int record_completed_work(int handle, int id);
+bool work_already_completed(int handle, int id);
+
+void clear_restore_state();
+
+void print_kfd_bo_stat(int bo_cnt, struct kfd_criu_bo_bucket *bo_list);
+
+int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp,
+		 void *buffer, size_t buffer_size, amdgpu_device_handle h_dev,
+		 uint64_t max_copy_size, enum sdma_op_type type, bool do_not_free);
+
+int serve_out_dmabuf_fd(int handle, int fd);
+
+#endif		/* __AMDGPU_PLUGIN_UTIL_H__ */
diff --git a/plugins/amdgpu/amdgpu_socket_utils.c b/plugins/amdgpu/amdgpu_socket_utils.c
new file mode 100644
index 000000000..c8bf6d1ba
--- /dev/null
+++ b/plugins/amdgpu/amdgpu_socket_utils.c
@@ -0,0 +1,320 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+
+#include "amdgpu_socket_utils.h"
+#include "criu-log.h"
+#include "common/scm.h"
+#include "fdstore.h"
+#include "util-pie.h"
+#include "util.h"
+
+int parallel_socket_addr_len;
+struct sockaddr_un parallel_socket_addr;
+int parallel_socket_id = 0;
+
+static void amdgpu_socket_name_gen(struct sockaddr_un *addr, int *len)
+{
+	addr->sun_family = AF_UNIX;
+	snprintf(addr->sun_path, UNIX_PATH_MAX, "x/criu-amdgpu-parallel-%s", criu_run_id);
+	*len = SUN_LEN(addr);
+	*addr->sun_path = '\0';
+}
+
+int install_parallel_sock(void)
+{
+	int ret = 0;
+	int sock_fd;
+
+	sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+	if (sock_fd < 0) {
+		pr_perror("socket creation failed");
+		return -1;
+	}
+
+	amdgpu_socket_name_gen(&parallel_socket_addr, &parallel_socket_addr_len);
+	ret = bind(sock_fd, (struct sockaddr *)&parallel_socket_addr, parallel_socket_addr_len);
+	if (ret < 0) {
+		pr_perror("bind failed");
+		goto err;
+	}
+
+	ret = listen(sock_fd, SOMAXCONN);
+	if (ret < 0) {
+		pr_perror("listen failed");
+		goto err;
+	}
+
+	parallel_socket_id = fdstore_add(sock_fd);
+	if (parallel_socket_id < 0) {
+		ret = -1;
+		goto err;
+	}
+err:
+	close(sock_fd);
+	return ret;
+}
+
+void parallel_restore_bo_add(int dmabuf_fd, int gpu_id, uint64_t size, uint64_t offset,
+			     parallel_restore_cmd *restore_cmd)
+{
+	parallel_restore_entry *restore_entry = &restore_cmd->entries[restore_cmd->cmd_head.entry_num];
+	restore_entry->gpu_id = gpu_id;
+	restore_entry->write_id = restore_cmd->cmd_head.fd_write_num;
+	restore_entry->write_offset = 0;
+	restore_entry->read_offset = offset;
+	restore_entry->size = size;
+
+	restore_cmd->fds_write[restore_cmd->cmd_head.fd_write_num] = dmabuf_fd;
+
+	restore_cmd->cmd_head.entry_num += 1;
+	restore_cmd->cmd_head.fd_write_num += 1;
+}
+
+void parallel_restore_gpu_id_add(int gpu_id, int minor, parallel_restore_cmd *restore_cmd)
+{
+	restore_cmd->gpu_ids[restore_cmd->cmd_head.gpu_num] = (parallel_gpu_info){ gpu_id, minor };
+	restore_cmd->cmd_head.gpu_num += 1;
+}
+
+static int send_metadata(int sock_fd, parallel_restore_cmd *restore_cmd)
+{
+	if (send(sock_fd, &restore_cmd->cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) {
+		pr_perror("Send parallel restore command head fail");
+		return -1;
+	}
+	return 0;
+}
+
+static int send_gpu_ids(int sock_fd, parallel_restore_cmd *restore_cmd)
+{
+	if (send(sock_fd, restore_cmd->gpu_ids, restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info), 0) < 0) {
+		pr_perror("Send GPU ids of parallel restore command fail");
+		return -1;
+	}
+	return 0;
+}
+
+static int send_cmds(int sock_fd, parallel_restore_cmd *restore_cmd)
+{
+	if (send(sock_fd, restore_cmd->entries, restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry), 0) < 0) {
+		pr_perror("Send parallel restore command fail");
+		return -1;
+	}
+	return 0;
+}
+
+static int send_dmabuf_fds(int sock_fd, parallel_restore_cmd *restore_cmd)
+{
+	if (send_fds(sock_fd, NULL, 0, restore_cmd->fds_write, restore_cmd->cmd_head.fd_write_num, 0, 0) < 0) {
+		pr_perror("Send dmabuf fds fail");
+		return -1;
+	}
+	return 0;
+}
+
+int send_parallel_restore_cmd(parallel_restore_cmd *restore_cmd)
+{
+	int sock_fd;
+	int ret = 0;
+
+	sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+	if (sock_fd < 0) {
+		pr_perror("Socket creation failed");
+		return -1;
+	}
+
+	ret = connect(sock_fd, (struct sockaddr *)&parallel_socket_addr, parallel_socket_addr_len);
+	if (ret < 0) {
+		pr_perror("Connect failed");
+		goto err;
+	}
+
+	ret = send_metadata(sock_fd, restore_cmd);
+	if (ret) {
+		goto err;
+	}
+
+	ret = send_gpu_ids(sock_fd, restore_cmd);
+	if (ret) {
+		goto err;
+	}
+
+	ret = send_cmds(sock_fd, restore_cmd);
+	if (ret) {
+		goto err;
+	}
+
+	ret = send_dmabuf_fds(sock_fd, restore_cmd);
+
+err:
+	close(sock_fd);
+	return ret;
+}
+
+int init_parallel_restore_cmd(int num, int id, int gpu_num, parallel_restore_cmd *restore_cmd)
+{
+	restore_cmd->cmd_head.id = id;
+	restore_cmd->cmd_head.fd_write_num = 0;
+	restore_cmd->cmd_head.entry_num = 0;
+	restore_cmd->cmd_head.gpu_num = 0;
+
+	restore_cmd->gpu_ids = xzalloc(gpu_num * sizeof(parallel_gpu_info));
+	if (!restore_cmd->gpu_ids)
+		return -ENOMEM;
+	restore_cmd->fds_write = xzalloc(num * sizeof(int));
+	if (!restore_cmd->fds_write)
+		return -ENOMEM;
+	restore_cmd->entries = xzalloc(num * sizeof(parallel_restore_entry));
+	if (!restore_cmd->entries)
+		return -ENOMEM;
+	return 0;
+}
+
+void free_parallel_restore_cmd(parallel_restore_cmd *restore_cmd)
+{
+	if (restore_cmd->gpu_ids)
+		xfree(restore_cmd->gpu_ids);
+	if (restore_cmd->fds_write)
+		xfree(restore_cmd->fds_write);
+	if (restore_cmd->entries)
+		xfree(restore_cmd->entries);
+}
+
+static int init_parallel_restore_cmd_by_head(parallel_restore_cmd *restore_cmd)
+{
+	restore_cmd->gpu_ids = xzalloc(restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info));
+	if (!restore_cmd->gpu_ids)
+		return -ENOMEM;
+	restore_cmd->fds_write = xzalloc(restore_cmd->cmd_head.fd_write_num * sizeof(int));
+	if (!restore_cmd->fds_write)
+		return -ENOMEM;
+	restore_cmd->entries = xzalloc(restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry));
+	if (!restore_cmd->entries)
+		return -ENOMEM;
+	return 0;
+}
+
+static int check_quit_cmd(parallel_restore_cmd *restore_cmd)
+{
+	return restore_cmd->cmd_head.fd_write_num == 0;
+}
+
+static int recv_metadata(int client_fd, parallel_restore_cmd *restore_cmd)
+{
+	if (recv(client_fd, &restore_cmd->cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) {
+		pr_perror("Recv parallel restore command head fail");
+		return -1;
+	}
+	return 0;
+}
+
+static int recv_cmds(int client_fd, parallel_restore_cmd *restore_cmd)
+{
+	if (recv(client_fd, restore_cmd->entries, restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry), 0) < 0) {
+		pr_perror("Recv parallel restore command fail");
+		return -1;
+	}
+	return 0;
+}
+
+static int recv_gpu_ids(int sock_fd, parallel_restore_cmd *restore_cmd)
+{
+	if (recv(sock_fd, restore_cmd->gpu_ids, restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info), 0) < 0) {
+		pr_perror("Send GPU ids of parallel restore command fail");
+		return -1;
+	}
+	return 0;
+}
+
+static int recv_dmabuf_fds(int client_fd, parallel_restore_cmd *restore_cmd)
+{
+	if (recv_fds(client_fd, restore_cmd->fds_write, restore_cmd->cmd_head.fd_write_num, 0, 0) < 0) {
+		pr_perror("Recv dmabuf fds fail");
+		return -1;
+	}
+	return 0;
+}
+
+int recv_parallel_restore_cmd(parallel_restore_cmd *restore_cmd)
+{
+	int sock_fd, client_fd;
+	int ret = 0;
+
+	sock_fd = fdstore_get(parallel_socket_id);
+	if (sock_fd < 0)
+		return -1;
+
+	client_fd = accept(sock_fd, NULL, NULL);
+	if (client_fd < 0) {
+		ret = client_fd;
+		goto err_accept;
+	}
+
+	ret = recv_metadata(client_fd, restore_cmd);
+	if (ret) {
+		goto err;
+	}
+
+	// Return 1 to quit
+	if (check_quit_cmd(restore_cmd)) {
+		ret = 1;
+		goto err;
+	}
+
+	ret = init_parallel_restore_cmd_by_head(restore_cmd);
+	if (ret) {
+		goto err;
+	}
+
+	ret = recv_gpu_ids(client_fd, restore_cmd);
+	if (ret) {
+		goto err;
+	}
+
+	ret = recv_cmds(client_fd, restore_cmd);
+	if (ret) {
+		goto err;
+	}
+
+	ret = recv_dmabuf_fds(client_fd, restore_cmd);
+
+err:
+	close(client_fd);
+err_accept:
+	close(sock_fd);
+	return ret;
+}
+
+int close_parallel_restore_server(void)
+{
+	int sock_fd;
+	int ret = 0;
+	parallel_restore_cmd_head cmd_head;
+
+	sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+	if (sock_fd < 0) {
+		pr_perror("Socket creation failed");
+		return -1;
+	}
+
+	ret = connect(sock_fd, (struct sockaddr *)&parallel_socket_addr, parallel_socket_addr_len);
+	if (ret < 0) {
+		pr_perror("Connect failed");
+		goto err;
+	}
+
+	memset(&cmd_head, 0, sizeof(parallel_restore_cmd_head));
+	if (send(sock_fd, &cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) {
+		pr_perror("Send parallel restore command head fail");
+		return -1;
+	}
+
+err:
+	close(sock_fd);
+	return ret;
+}
\ No newline at end of file
diff --git a/plugins/amdgpu/amdgpu_socket_utils.h b/plugins/amdgpu/amdgpu_socket_utils.h
new file mode 100644
index 000000000..d7200c6bd
--- /dev/null
+++ b/plugins/amdgpu/amdgpu_socket_utils.h
@@ -0,0 +1,54 @@
+#ifndef __KFD_PLUGIN_AMDGPU_SOCKET_UTILS_H__
+#define __KFD_PLUGIN_AMDGPU_SOCKET_UTILS_H__
+
+typedef struct {
+	int id;
+	int fd_write_num; /* The number of buffer objects to be restored. */
+	int entry_num;	  /* The number of restore commands.*/
+	int gpu_num;
+} parallel_restore_cmd_head;
+
+typedef struct {
+	int gpu_id;
+	int minor;
+} parallel_gpu_info;
+
+typedef struct {
+	int gpu_id;
+	int write_id;
+	uint64_t read_offset;
+	uint64_t write_offset;
+	uint64_t size;
+} parallel_restore_entry;
+
+typedef struct {
+	parallel_restore_cmd_head cmd_head;
+	int *fds_write;
+	parallel_gpu_info *gpu_ids;
+	parallel_restore_entry *entries;
+} parallel_restore_cmd;
+
+/*
+ * For parallel_restore, a background thread in the main CRIU process is used to restore the GPU
+ * buffer object. However, initially, the ownership of these buffer objects and the metadata for
+ * restoration are all with the target process. Therefore, we introduce a series of functions to
+ * help the target process send these tasks to the main CRIU process.
+ */
+int init_parallel_restore_cmd(int num, int id, int gpu_num, parallel_restore_cmd *restore_cmd);
+
+void free_parallel_restore_cmd(parallel_restore_cmd *restore_cmd);
+
+int install_parallel_sock(void);
+
+int send_parallel_restore_cmd(parallel_restore_cmd *restore_cmd);
+
+int recv_parallel_restore_cmd(parallel_restore_cmd *restore_cmd);
+
+void parallel_restore_bo_add(int dmabuf_fd, int gpu_id, uint64_t size, uint64_t offset,
+			     parallel_restore_cmd *restore_cmd);
+
+void parallel_restore_gpu_id_add(int gpu_id, int minor, parallel_restore_cmd *restore_cmd);
+
+int close_parallel_restore_server(void);
+
+#endif
\ No newline at end of file
diff --git a/plugins/amdgpu/criu-amdgpu.proto b/plugins/amdgpu/criu-amdgpu.proto
index 81d00d3ff..7682a8f21 100644
--- a/plugins/amdgpu/criu-amdgpu.proto
+++ b/plugins/amdgpu/criu-amdgpu.proto
@@ -5,7 +5,7 @@ message dev_iolink {
 	required uint32 node_to_id = 2;
 }
 
-message device_entry {
+message kfd_device_entry {
 	required uint32 node_id = 1;
 	required uint32 gpu_id = 2;
 	required uint32 cpu_cores_count = 3;
@@ -40,27 +40,56 @@ message device_entry {
 	repeated dev_iolink iolinks = 32;
 }
 
-message bo_entry {
-	required uint64	addr = 1;
-	required uint64	size = 2;
-	required uint64	offset = 3;
+message kfd_bo_entry {
+	required uint64 addr = 1;
+	required uint64 size = 2;
+	required uint64 offset = 3;
 	required uint32 alloc_flags = 4;
 	required uint32 gpu_id = 5;
+	required uint32 handle = 6;
 }
 
 message criu_kfd {
 	required uint32 pid = 1;
 	required uint32 num_of_gpus = 2;
 	required uint32 num_of_cpus = 3;
-	repeated device_entry device_entries = 4;
-	required uint64	num_of_bos = 5;
-	repeated bo_entry bo_entries = 6;
-	required uint32	num_of_objects = 7;
+	repeated kfd_device_entry device_entries = 4;
+	required uint64 num_of_bos = 5;
+	repeated kfd_bo_entry bo_entries = 6;
+	required uint32 num_of_objects = 7;
 	required uint64 shared_mem_size = 8;
 	required uint32 shared_mem_magic = 9;
 	required bytes priv_data = 10;
 }
 
+message drm_bo_entry {
+	required uint64 addr = 1;
+	required uint64 size = 2;
+	required uint64 offset = 3;
+	required uint64 alloc_flags = 4;
+	required uint64 alignment = 5;
+	required uint32 preferred_domains = 6;
+	required uint32 handle = 7;
+	required uint32 is_import = 8;
+	required uint32 num_of_vms = 9;
+	repeated drm_vm_entry vm_entries = 10;
+}
+
+message drm_vm_entry {
+	required uint64 addr = 1;
+	required uint64 size = 2;
+	required uint64 offset = 3;
+	required uint64 flags = 4;
+}
+
 message criu_render_node {
 	required uint32 gpu_id = 1;
+	required uint32 id = 2;
+	required uint32 drm_render_minor = 3;
+	required uint64 num_of_bos = 4;
+	repeated drm_bo_entry bo_entries = 5;
+}
+
+message criu_dmabuf_node {
+	required uint32 gem_handle = 1;
 }
diff --git a/plugins/amdgpu/drm.h b/plugins/amdgpu/drm.h
new file mode 100644
index 000000000..3cd5cf15e
--- /dev/null
+++ b/plugins/amdgpu/drm.h
@@ -0,0 +1,1476 @@
+/*
+ * Header for the Direct Rendering Manager
+ *
+ * Author: Rickard E. (Rik) Faith <faith@valinux.com>
+ *
+ * Acknowledgments:
+ * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic cmpxchg.
+ */
+
+/*
+ * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
+ * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DRM_H_
+#define _DRM_H_
+
+#if defined(__KERNEL__)
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+typedef unsigned int drm_handle_t;
+
+#elif defined(__linux__)
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+typedef unsigned int drm_handle_t;
+
+#else /* One of the BSDs */
+
+#include <stdint.h>
+#include <sys/ioccom.h>
+#include <sys/types.h>
+typedef int8_t   __s8;
+typedef uint8_t  __u8;
+typedef int16_t  __s16;
+typedef uint16_t __u16;
+typedef int32_t  __s32;
+typedef uint32_t __u32;
+typedef int64_t  __s64;
+typedef uint64_t __u64;
+typedef size_t   __kernel_size_t;
+typedef unsigned long drm_handle_t;
+
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_NAME	"drm"	  /**< Name in kernel, /dev, and /proc */
+#define DRM_MIN_ORDER	5	  /**< At least 2^5 bytes = 32 bytes */
+#define DRM_MAX_ORDER	22	  /**< Up to 2^22 bytes = 4MB */
+#define DRM_RAM_PERCENT 10	  /**< How much system ram can we lock? */
+
+#define _DRM_LOCK_HELD	0x80000000U /**< Hardware lock is held */
+#define _DRM_LOCK_CONT	0x40000000U /**< Hardware lock is contended */
+#define _DRM_LOCK_IS_HELD(lock)	   ((lock) & _DRM_LOCK_HELD)
+#define _DRM_LOCK_IS_CONT(lock)	   ((lock) & _DRM_LOCK_CONT)
+#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT))
+
+typedef unsigned int drm_context_t;
+typedef unsigned int drm_drawable_t;
+typedef unsigned int drm_magic_t;
+
+/*
+ * Cliprect.
+ *
+ * \warning: If you change this structure, make sure you change
+ * XF86DRIClipRectRec in the server as well
+ *
+ * \note KW: Actually it's illegal to change either for
+ * backwards-compatibility reasons.
+ */
+struct drm_clip_rect {
+	unsigned short x1;
+	unsigned short y1;
+	unsigned short x2;
+	unsigned short y2;
+};
+
+/*
+ * Drawable information.
+ */
+struct drm_drawable_info {
+	unsigned int num_rects;
+	struct drm_clip_rect *rects;
+};
+
+/*
+ * Texture region,
+ */
+struct drm_tex_region {
+	unsigned char next;
+	unsigned char prev;
+	unsigned char in_use;
+	unsigned char padding;
+	unsigned int age;
+};
+
+/*
+ * Hardware lock.
+ *
+ * The lock structure is a simple cache-line aligned integer.  To avoid
+ * processor bus contention on a multiprocessor system, there should not be any
+ * other data stored in the same cache line.
+ */
+struct drm_hw_lock {
+	__volatile__ unsigned int lock;		/**< lock variable */
+	char padding[60];			/**< Pad to cache line */
+};
+
+/*
+ * DRM_IOCTL_VERSION ioctl argument type.
+ *
+ * \sa drmGetVersion().
+ */
+struct drm_version {
+	int version_major;	  /**< Major version */
+	int version_minor;	  /**< Minor version */
+	int version_patchlevel;	  /**< Patch level */
+	__kernel_size_t name_len;	  /**< Length of name buffer */
+	char __user *name;	  /**< Name of driver */
+	__kernel_size_t date_len;	  /**< Length of date buffer */
+	char __user *date;	  /**< User-space buffer to hold date */
+	__kernel_size_t desc_len;	  /**< Length of desc buffer */
+	char __user *desc;	  /**< User-space buffer to hold desc */
+};
+
+/*
+ * DRM_IOCTL_GET_UNIQUE ioctl argument type.
+ *
+ * \sa drmGetBusid() and drmSetBusId().
+ */
+struct drm_unique {
+	__kernel_size_t unique_len;	  /**< Length of unique */
+	char __user *unique;	  /**< Unique name for driver instantiation */
+};
+
+struct drm_list {
+	int count;		  /**< Length of user-space structures */
+	struct drm_version __user *version;
+};
+
+struct drm_block {
+	int unused;
+};
+
+/*
+ * DRM_IOCTL_CONTROL ioctl argument type.
+ *
+ * \sa drmCtlInstHandler() and drmCtlUninstHandler().
+ */
+struct drm_control {
+	enum {
+		DRM_ADD_COMMAND,
+		DRM_RM_COMMAND,
+		DRM_INST_HANDLER,
+		DRM_UNINST_HANDLER
+	} func;
+	int irq;
+};
+
+/*
+ * Type of memory to map.
+ */
+enum drm_map_type {
+	_DRM_FRAME_BUFFER = 0,	  /**< WC (no caching), no core dump */
+	_DRM_REGISTERS = 1,	  /**< no caching, no core dump */
+	_DRM_SHM = 2,		  /**< shared, cached */
+	_DRM_AGP = 3,		  /**< AGP/GART */
+	_DRM_SCATTER_GATHER = 4,  /**< Scatter/gather memory for PCI DMA */
+	_DRM_CONSISTENT = 5	  /**< Consistent memory for PCI DMA */
+};
+
+/*
+ * Memory mapping flags.
+ */
+enum drm_map_flags {
+	_DRM_RESTRICTED = 0x01,	     /**< Cannot be mapped to user-virtual */
+	_DRM_READ_ONLY = 0x02,
+	_DRM_LOCKED = 0x04,	     /**< shared, cached, locked */
+	_DRM_KERNEL = 0x08,	     /**< kernel requires access */
+	_DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */
+	_DRM_CONTAINS_LOCK = 0x20,   /**< SHM page that contains lock */
+	_DRM_REMOVABLE = 0x40,	     /**< Removable mapping */
+	_DRM_DRIVER = 0x80	     /**< Managed by driver */
+};
+
+struct drm_ctx_priv_map {
+	unsigned int ctx_id;	 /**< Context requesting private mapping */
+	void *handle;		 /**< Handle of map */
+};
+
+/*
+ * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls
+ * argument type.
+ *
+ * \sa drmAddMap().
+ */
+struct drm_map {
+	unsigned long offset;	 /**< Requested physical address (0 for SAREA)*/
+	unsigned long size;	 /**< Requested physical size (bytes) */
+	enum drm_map_type type;	 /**< Type of memory to map */
+	enum drm_map_flags flags;	 /**< Flags */
+	void *handle;		 /**< User-space: "Handle" to pass to mmap() */
+				 /**< Kernel-space: kernel-virtual address */
+	int mtrr;		 /**< MTRR slot used */
+	/*   Private data */
+};
+
+/*
+ * DRM_IOCTL_GET_CLIENT ioctl argument type.
+ */
+struct drm_client {
+	int idx;		/**< Which client desired? */
+	int auth;		/**< Is client authenticated? */
+	unsigned long pid;	/**< Process ID */
+	unsigned long uid;	/**< User ID */
+	unsigned long magic;	/**< Magic */
+	unsigned long iocs;	/**< Ioctl count */
+};
+
+enum drm_stat_type {
+	_DRM_STAT_LOCK,
+	_DRM_STAT_OPENS,
+	_DRM_STAT_CLOSES,
+	_DRM_STAT_IOCTLS,
+	_DRM_STAT_LOCKS,
+	_DRM_STAT_UNLOCKS,
+	_DRM_STAT_VALUE,	/**< Generic value */
+	_DRM_STAT_BYTE,		/**< Generic byte counter (1024bytes/K) */
+	_DRM_STAT_COUNT,	/**< Generic non-byte counter (1000/k) */
+
+	_DRM_STAT_IRQ,		/**< IRQ */
+	_DRM_STAT_PRIMARY,	/**< Primary DMA bytes */
+	_DRM_STAT_SECONDARY,	/**< Secondary DMA bytes */
+	_DRM_STAT_DMA,		/**< DMA */
+	_DRM_STAT_SPECIAL,	/**< Special DMA (e.g., priority or polled) */
+	_DRM_STAT_MISSED	/**< Missed DMA opportunity */
+	    /* Add to the *END* of the list */
+};
+
+/*
+ * DRM_IOCTL_GET_STATS ioctl argument type.
+ */
+struct drm_stats {
+	unsigned long count;
+	struct {
+		unsigned long value;
+		enum drm_stat_type type;
+	} data[15];
+};
+
+/*
+ * Hardware locking flags.
+ */
+enum drm_lock_flags {
+	_DRM_LOCK_READY = 0x01,	     /**< Wait until hardware is ready for DMA */
+	_DRM_LOCK_QUIESCENT = 0x02,  /**< Wait until hardware quiescent */
+	_DRM_LOCK_FLUSH = 0x04,	     /**< Flush this context's DMA queue first */
+	_DRM_LOCK_FLUSH_ALL = 0x08,  /**< Flush all DMA queues first */
+	/* These *HALT* flags aren't supported yet
+	   -- they will be used to support the
+	   full-screen DGA-like mode. */
+	_DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */
+	_DRM_HALT_CUR_QUEUES = 0x20  /**< Halt all current queues */
+};
+
+/*
+ * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type.
+ *
+ * \sa drmGetLock() and drmUnlock().
+ */
+struct drm_lock {
+	int context;
+	enum drm_lock_flags flags;
+};
+
+/*
+ * DMA flags
+ *
+ * \warning
+ * These values \e must match xf86drm.h.
+ *
+ * \sa drm_dma.
+ */
+enum drm_dma_flags {
+	/* Flags for DMA buffer dispatch */
+	_DRM_DMA_BLOCK = 0x01,	      /**<
+				       * Block until buffer dispatched.
+				       *
+				       * \note The buffer may not yet have
+				       * been processed by the hardware --
+				       * getting a hardware lock with the
+				       * hardware quiescent will ensure
+				       * that the buffer has been
+				       * processed.
+				       */
+	_DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */
+	_DRM_DMA_PRIORITY = 0x04,     /**< High priority dispatch */
+
+	/* Flags for DMA buffer request */
+	_DRM_DMA_WAIT = 0x10,	      /**< Wait for free buffers */
+	_DRM_DMA_SMALLER_OK = 0x20,   /**< Smaller-than-requested buffers OK */
+	_DRM_DMA_LARGER_OK = 0x40     /**< Larger-than-requested buffers OK */
+};
+
+/*
+ * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type.
+ *
+ * \sa drmAddBufs().
+ */
+struct drm_buf_desc {
+	int count;		 /**< Number of buffers of this size */
+	int size;		 /**< Size in bytes */
+	int low_mark;		 /**< Low water mark */
+	int high_mark;		 /**< High water mark */
+	enum {
+		_DRM_PAGE_ALIGN = 0x01,	/**< Align on page boundaries for DMA */
+		_DRM_AGP_BUFFER = 0x02,	/**< Buffer is in AGP space */
+		_DRM_SG_BUFFER = 0x04,	/**< Scatter/gather memory buffer */
+		_DRM_FB_BUFFER = 0x08,	/**< Buffer is in frame buffer */
+		_DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */
+	} flags;
+	unsigned long agp_start; /**<
+				  * Start address of where the AGP buffers are
+				  * in the AGP aperture
+				  */
+};
+
+/*
+ * DRM_IOCTL_INFO_BUFS ioctl argument type.
+ */
+struct drm_buf_info {
+	int count;		/**< Entries in list */
+	struct drm_buf_desc __user *list;
+};
+
+/*
+ * DRM_IOCTL_FREE_BUFS ioctl argument type.
+ */
+struct drm_buf_free {
+	int count;
+	int __user *list;
+};
+
+/*
+ * Buffer information
+ *
+ * \sa drm_buf_map.
+ */
+struct drm_buf_pub {
+	int idx;		       /**< Index into the master buffer list */
+	int total;		       /**< Buffer size */
+	int used;		       /**< Amount of buffer in use (for DMA) */
+	void __user *address;	       /**< Address of buffer */
+};
+
+/*
+ * DRM_IOCTL_MAP_BUFS ioctl argument type.
+ */
+struct drm_buf_map {
+	int count;		/**< Length of the buffer list */
+#ifdef __cplusplus
+	void __user *virt;
+#else
+	void __user *virtual;		/**< Mmap'd area in user-virtual */
+#endif
+	struct drm_buf_pub __user *list;	/**< Buffer information */
+};
+
+/*
+ * DRM_IOCTL_DMA ioctl argument type.
+ *
+ * Indices here refer to the offset into the buffer list in drm_buf_get.
+ *
+ * \sa drmDMA().
+ */
+struct drm_dma {
+	int context;			  /**< Context handle */
+	int send_count;			  /**< Number of buffers to send */
+	int __user *send_indices;	  /**< List of handles to buffers */
+	int __user *send_sizes;		  /**< Lengths of data to send */
+	enum drm_dma_flags flags;	  /**< Flags */
+	int request_count;		  /**< Number of buffers requested */
+	int request_size;		  /**< Desired size for buffers */
+	int __user *request_indices;	  /**< Buffer information */
+	int __user *request_sizes;
+	int granted_count;		  /**< Number of buffers granted */
+};
+
+enum drm_ctx_flags {
+	_DRM_CONTEXT_PRESERVED = 0x01,
+	_DRM_CONTEXT_2DONLY = 0x02
+};
+
+/*
+ * DRM_IOCTL_ADD_CTX ioctl argument type.
+ *
+ * \sa drmCreateContext() and drmDestroyContext().
+ */
+struct drm_ctx {
+	drm_context_t handle;
+	enum drm_ctx_flags flags;
+};
+
+/*
+ * DRM_IOCTL_RES_CTX ioctl argument type.
+ */
+struct drm_ctx_res {
+	int count;
+	struct drm_ctx __user *contexts;
+};
+
+/*
+ * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type.
+ */
+struct drm_draw {
+	drm_drawable_t handle;
+};
+
+/*
+ * DRM_IOCTL_UPDATE_DRAW ioctl argument type.
+ */
+typedef enum {
+	DRM_DRAWABLE_CLIPRECTS
+} drm_drawable_info_type_t;
+
+struct drm_update_draw {
+	drm_drawable_t handle;
+	unsigned int type;
+	unsigned int num;
+	unsigned long long data;
+};
+
+/*
+ * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type.
+ */
+struct drm_auth {
+	drm_magic_t magic;
+};
+
+/*
+ * DRM_IOCTL_IRQ_BUSID ioctl argument type.
+ *
+ * \sa drmGetInterruptFromBusID().
+ */
+struct drm_irq_busid {
+	int irq;	/**< IRQ number */
+	int busnum;	/**< bus number */
+	int devnum;	/**< device number */
+	int funcnum;	/**< function number */
+};
+
+enum drm_vblank_seq_type {
+	_DRM_VBLANK_ABSOLUTE = 0x0,	/**< Wait for specific vblank sequence number */
+	_DRM_VBLANK_RELATIVE = 0x1,	/**< Wait for given number of vblanks */
+	/* bits 1-6 are reserved for high crtcs */
+	_DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e,
+	_DRM_VBLANK_EVENT = 0x4000000,   /**< Send event instead of blocking */
+	_DRM_VBLANK_FLIP = 0x8000000,   /**< Scheduled buffer swap should flip */
+	_DRM_VBLANK_NEXTONMISS = 0x10000000,	/**< If missed, wait for next vblank */
+	_DRM_VBLANK_SECONDARY = 0x20000000,	/**< Secondary display controller */
+	_DRM_VBLANK_SIGNAL = 0x40000000	/**< Send signal instead of blocking, unsupported */
+};
+#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1
+
+#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE)
+#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \
+				_DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS)
+
+struct drm_wait_vblank_request {
+	enum drm_vblank_seq_type type;
+	unsigned int sequence;
+	unsigned long signal;
+};
+
+struct drm_wait_vblank_reply {
+	enum drm_vblank_seq_type type;
+	unsigned int sequence;
+	long tval_sec;
+	long tval_usec;
+};
+
+/*
+ * DRM_IOCTL_WAIT_VBLANK ioctl argument type.
+ *
+ * \sa drmWaitVBlank().
+ */
+union drm_wait_vblank {
+	struct drm_wait_vblank_request request;
+	struct drm_wait_vblank_reply reply;
+};
+
+#define _DRM_PRE_MODESET 1
+#define _DRM_POST_MODESET 2
+
+/*
+ * DRM_IOCTL_MODESET_CTL ioctl argument type
+ *
+ * \sa drmModesetCtl().
+ */
+struct drm_modeset_ctl {
+	__u32 crtc;
+	__u32 cmd;
+};
+
+/*
+ * DRM_IOCTL_AGP_ENABLE ioctl argument type.
+ *
+ * \sa drmAgpEnable().
+ */
+struct drm_agp_mode {
+	unsigned long mode;	/**< AGP mode */
+};
+
+/*
+ * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type.
+ *
+ * \sa drmAgpAlloc() and drmAgpFree().
+ */
+struct drm_agp_buffer {
+	unsigned long size;	/**< In bytes -- will round to page boundary */
+	unsigned long handle;	/**< Used for binding / unbinding */
+	unsigned long type;	/**< Type of memory to allocate */
+	unsigned long physical;	/**< Physical used by i810 */
+};
+
+/*
+ * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type.
+ *
+ * \sa drmAgpBind() and drmAgpUnbind().
+ */
+struct drm_agp_binding {
+	unsigned long handle;	/**< From drm_agp_buffer */
+	unsigned long offset;	/**< In bytes -- will round to page boundary */
+};
+
+/*
+ * DRM_IOCTL_AGP_INFO ioctl argument type.
+ *
+ * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(),
+ * drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(),
+ * drmAgpVendorId() and drmAgpDeviceId().
+ */
+struct drm_agp_info {
+	int agp_version_major;
+	int agp_version_minor;
+	unsigned long mode;
+	unsigned long aperture_base;	/* physical address */
+	unsigned long aperture_size;	/* bytes */
+	unsigned long memory_allowed;	/* bytes */
+	unsigned long memory_used;
+
+	/* PCI information */
+	unsigned short id_vendor;
+	unsigned short id_device;
+};
+
+/*
+ * DRM_IOCTL_SG_ALLOC ioctl argument type.
+ */
+struct drm_scatter_gather {
+	unsigned long size;	/**< In bytes -- will round to page boundary */
+	unsigned long handle;	/**< Used for mapping / unmapping */
+};
+
+/*
+ * DRM_IOCTL_SET_VERSION ioctl argument type.
+ */
+struct drm_set_version {
+	int drm_di_major;
+	int drm_di_minor;
+	int drm_dd_major;
+	int drm_dd_minor;
+};
+
+/**
+ * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl.
+ * @handle: Handle of the object to be closed.
+ * @pad: Padding.
+ *
+ * Releases the handle to an mm object.
+ */
+struct drm_gem_close {
+	__u32 handle;
+	__u32 pad;
+};
+
+/**
+ * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl.
+ * @handle: Handle for the object being named.
+ * @name: Returned global name.
+ *
+ * Create a global name for an object, returning the name.
+ *
+ * Note that the name does not hold a reference; when the object
+ * is freed, the name goes away.
+ */
+struct drm_gem_flink {
+	__u32 handle;
+	__u32 name;
+};
+
+/**
+ * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl.
+ * @name: Name of object being opened.
+ * @handle: Returned handle for the object.
+ * @size: Returned size of the object
+ *
+ * Open an object using the global name, returning a handle and the size.
+ *
+ * This handle (of course) holds a reference to the object, so the object
+ * will not go away until the handle is deleted.
+ */
+struct drm_gem_open {
+	__u32 name;
+	__u32 handle;
+	__u64 size;
+};
+
+/**
+ * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl.
+ * @handle: The handle of a gem object.
+ * @new_handle: An available gem handle.
+ *
+ * This ioctl changes the handle of a GEM object to the specified one.
+ * The new handle must be unused. On success the old handle is closed
+ * and all further IOCTL should refer to the new handle only.
+ * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle.
+ */
+struct drm_gem_change_handle {
+	__u32 handle;
+	__u32 new_handle;
+};
+
+/**
+ * DRM_CAP_DUMB_BUFFER
+ *
+ * If set to 1, the driver supports creating dumb buffers via the
+ * &DRM_IOCTL_MODE_CREATE_DUMB ioctl.
+ */
+#define DRM_CAP_DUMB_BUFFER		0x1
+/**
+ * DRM_CAP_VBLANK_HIGH_CRTC
+ *
+ * If set to 1, the kernel supports specifying a :ref:`CRTC index<crtc_index>`
+ * in the high bits of &drm_wait_vblank_request.type.
+ *
+ * Starting kernel version 2.6.39, this capability is always set to 1.
+ */
+#define DRM_CAP_VBLANK_HIGH_CRTC	0x2
+/**
+ * DRM_CAP_DUMB_PREFERRED_DEPTH
+ *
+ * The preferred bit depth for dumb buffers.
+ *
+ * The bit depth is the number of bits used to indicate the color of a single
+ * pixel excluding any padding. This is different from the number of bits per
+ * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per
+ * pixel.
+ *
+ * Note that this preference only applies to dumb buffers, it's irrelevant for
+ * other types of buffers.
+ */
+#define DRM_CAP_DUMB_PREFERRED_DEPTH	0x3
+/**
+ * DRM_CAP_DUMB_PREFER_SHADOW
+ *
+ * If set to 1, the driver prefers userspace to render to a shadow buffer
+ * instead of directly rendering to a dumb buffer. For best speed, userspace
+ * should do streaming ordered memory copies into the dumb buffer and never
+ * read from it.
+ *
+ * Note that this preference only applies to dumb buffers, it's irrelevant for
+ * other types of buffers.
+ */
+#define DRM_CAP_DUMB_PREFER_SHADOW	0x4
+/**
+ * DRM_CAP_PRIME
+ *
+ * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT
+ * and &DRM_PRIME_CAP_EXPORT.
+ *
+ * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and
+ * &DRM_PRIME_CAP_EXPORT are always advertised.
+ *
+ * PRIME buffers are exposed as dma-buf file descriptors.
+ * See :ref:`prime_buffer_sharing`.
+ */
+#define DRM_CAP_PRIME			0x5
+/**
+ * DRM_PRIME_CAP_IMPORT
+ *
+ * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME
+ * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl.
+ *
+ * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME.
+ */
+#define  DRM_PRIME_CAP_IMPORT		0x1
+/**
+ * DRM_PRIME_CAP_EXPORT
+ *
+ * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME
+ * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl.
+ *
+ * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME.
+ */
+#define  DRM_PRIME_CAP_EXPORT		0x2
+/**
+ * DRM_CAP_TIMESTAMP_MONOTONIC
+ *
+ * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in
+ * struct drm_event_vblank. If set to 1, the kernel will report timestamps with
+ * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these
+ * clocks.
+ *
+ * Starting from kernel version 2.6.39, the default value for this capability
+ * is 1. Starting kernel version 4.15, this capability is always set to 1.
+ */
+#define DRM_CAP_TIMESTAMP_MONOTONIC	0x6
+/**
+ * DRM_CAP_ASYNC_PAGE_FLIP
+ *
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy
+ * page-flips.
+ */
+#define DRM_CAP_ASYNC_PAGE_FLIP		0x7
+/**
+ * DRM_CAP_CURSOR_WIDTH
+ *
+ * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid
+ * width x height combination for the hardware cursor. The intention is that a
+ * hardware agnostic userspace can query a cursor plane size to use.
+ *
+ * Note that the cross-driver contract is to merely return a valid size;
+ * drivers are free to attach another meaning on top, eg. i915 returns the
+ * maximum plane size.
+ */
+#define DRM_CAP_CURSOR_WIDTH		0x8
+/**
+ * DRM_CAP_CURSOR_HEIGHT
+ *
+ * See &DRM_CAP_CURSOR_WIDTH.
+ */
+#define DRM_CAP_CURSOR_HEIGHT		0x9
+/**
+ * DRM_CAP_ADDFB2_MODIFIERS
+ *
+ * If set to 1, the driver supports supplying modifiers in the
+ * &DRM_IOCTL_MODE_ADDFB2 ioctl.
+ */
+#define DRM_CAP_ADDFB2_MODIFIERS	0x10
+/**
+ * DRM_CAP_PAGE_FLIP_TARGET
+ *
+ * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and
+ * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in
+ * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP
+ * ioctl.
+ */
+#define DRM_CAP_PAGE_FLIP_TARGET	0x11
+/**
+ * DRM_CAP_CRTC_IN_VBLANK_EVENT
+ *
+ * If set to 1, the kernel supports reporting the CRTC ID in
+ * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and
+ * &DRM_EVENT_FLIP_COMPLETE events.
+ *
+ * Starting kernel version 4.12, this capability is always set to 1.
+ */
+#define DRM_CAP_CRTC_IN_VBLANK_EVENT	0x12
+/**
+ * DRM_CAP_SYNCOBJ
+ *
+ * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`.
+ */
+#define DRM_CAP_SYNCOBJ		0x13
+/**
+ * DRM_CAP_SYNCOBJ_TIMELINE
+ *
+ * If set to 1, the driver supports timeline operations on sync objects. See
+ * :ref:`drm_sync_objects`.
+ */
+#define DRM_CAP_SYNCOBJ_TIMELINE	0x14
+/**
+ * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP
+ *
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic
+ * commits.
+ */
+#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP	0x15
+
+/* DRM_IOCTL_GET_CAP ioctl argument type */
+struct drm_get_cap {
+	__u64 capability;
+	__u64 value;
+};
+
+/**
+ * DRM_CLIENT_CAP_STEREO_3D
+ *
+ * If set to 1, the DRM core will expose the stereo 3D capabilities of the
+ * monitor by advertising the supported 3D layouts in the flags of struct
+ * drm_mode_modeinfo. See ``DRM_MODE_FLAG_3D_*``.
+ *
+ * This capability is always supported for all drivers starting from kernel
+ * version 3.13.
+ */
+#define DRM_CLIENT_CAP_STEREO_3D	1
+
+/**
+ * DRM_CLIENT_CAP_UNIVERSAL_PLANES
+ *
+ * If set to 1, the DRM core will expose all planes (overlay, primary, and
+ * cursor) to userspace.
+ *
+ * This capability has been introduced in kernel version 3.15. Starting from
+ * kernel version 3.17, this capability is always supported for all drivers.
+ */
+#define DRM_CLIENT_CAP_UNIVERSAL_PLANES  2
+
+/**
+ * DRM_CLIENT_CAP_ATOMIC
+ *
+ * If set to 1, the DRM core will expose atomic properties to userspace. This
+ * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and
+ * &DRM_CLIENT_CAP_ASPECT_RATIO.
+ *
+ * If the driver doesn't support atomic mode-setting, enabling this capability
+ * will fail with -EOPNOTSUPP.
+ *
+ * This capability has been introduced in kernel version 4.0. Starting from
+ * kernel version 4.2, this capability is always supported for atomic-capable
+ * drivers.
+ */
+#define DRM_CLIENT_CAP_ATOMIC	3
+
+/**
+ * DRM_CLIENT_CAP_ASPECT_RATIO
+ *
+ * If set to 1, the DRM core will provide aspect ratio information in modes.
+ * See ``DRM_MODE_FLAG_PIC_AR_*``.
+ *
+ * This capability is always supported for all drivers starting from kernel
+ * version 4.18.
+ */
+#define DRM_CLIENT_CAP_ASPECT_RATIO    4
+
+/**
+ * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS
+ *
+ * If set to 1, the DRM core will expose special connectors to be used for
+ * writing back to memory the scene setup in the commit. The client must enable
+ * &DRM_CLIENT_CAP_ATOMIC first.
+ *
+ * This capability is always supported for atomic-capable drivers starting from
+ * kernel version 4.19.
+ */
+#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS	5
+
+/**
+ * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT
+ *
+ * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and
+ * virtualbox) have additional restrictions for cursor planes (thus
+ * making cursor planes on those drivers not truly universal,) e.g.
+ * they need cursor planes to act like one would expect from a mouse
+ * cursor and have correctly set hotspot properties.
+ * If this client cap is not set the DRM core will hide cursor plane on
+ * those virtualized drivers because not setting it implies that the
+ * client is not capable of dealing with those extra restictions.
+ * Clients which do set cursor hotspot and treat the cursor plane
+ * like a mouse cursor should set this property.
+ * The client must enable &DRM_CLIENT_CAP_ATOMIC first.
+ *
+ * Setting this property on drivers which do not special case
+ * cursor planes (i.e. non-virtualized drivers) will return
+ * EOPNOTSUPP, which can be used by userspace to gauge
+ * requirements of the hardware/drivers they're running on.
+ *
+ * This capability is always supported for atomic-capable virtualized
+ * drivers starting from kernel version 6.6.
+ */
+#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT	6
+
+/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
+struct drm_set_client_cap {
+	__u64 capability;
+	__u64 value;
+};
+
+#define DRM_RDWR O_RDWR
+#define DRM_CLOEXEC O_CLOEXEC
+struct drm_prime_handle {
+	__u32 handle;
+
+	/** Flags.. only applicable for handle->fd */
+	__u32 flags;
+
+	/** Returned dmabuf file descriptor */
+	__s32 fd;
+};
+
+struct drm_syncobj_create {
+	__u32 handle;
+#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
+	__u32 flags;
+};
+
+struct drm_syncobj_destroy {
+	__u32 handle;
+	__u32 pad;
+};
+
+#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0)
+#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE         (1 << 1)
+#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0)
+#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE         (1 << 1)
+struct drm_syncobj_handle {
+	__u32 handle;
+	__u32 flags;
+
+	__s32 fd;
+	__u32 pad;
+
+	__u64 point;
+};
+
+struct drm_syncobj_transfer {
+	__u32 src_handle;
+	__u32 dst_handle;
+	__u64 src_point;
+	__u64 dst_point;
+	__u32 flags;
+	__u32 pad;
+};
+
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */
+struct drm_syncobj_wait {
+	__u64 handles;
+	/* absolute timeout */
+	__s64 timeout_nsec;
+	__u32 count_handles;
+	__u32 flags;
+	__u32 first_signaled; /* only valid when not waiting all */
+	__u32 pad;
+	/**
+	 * @deadline_nsec - fence deadline hint
+	 *
+	 * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
+	 * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
+	 * set.
+	 */
+	__u64 deadline_nsec;
+};
+
+struct drm_syncobj_timeline_wait {
+	__u64 handles;
+	/* wait on specific timeline point for every handles*/
+	__u64 points;
+	/* absolute timeout */
+	__s64 timeout_nsec;
+	__u32 count_handles;
+	__u32 flags;
+	__u32 first_signaled; /* only valid when not waiting all */
+	__u32 pad;
+	/**
+	 * @deadline_nsec - fence deadline hint
+	 *
+	 * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
+	 * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
+	 * set.
+	 */
+	__u64 deadline_nsec;
+};
+
+/**
+ * struct drm_syncobj_eventfd
+ * @handle: syncobj handle.
+ * @flags: Zero to wait for the point to be signalled, or
+ *         &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be
+ *         available for the point.
+ * @point: syncobj timeline point (set to zero for binary syncobjs).
+ * @fd: Existing eventfd to sent events to.
+ * @pad: Must be zero.
+ *
+ * Register an eventfd to be signalled by a syncobj. The eventfd counter will
+ * be incremented by one.
+ */
+struct drm_syncobj_eventfd {
+	__u32 handle;
+	__u32 flags;
+	__u64 point;
+	__s32 fd;
+	__u32 pad;
+};
+
+
+struct drm_syncobj_array {
+	__u64 handles;
+	__u32 count_handles;
+	__u32 pad;
+};
+
+#define DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED (1 << 0) /* last available point on timeline syncobj */
+struct drm_syncobj_timeline_array {
+	__u64 handles;
+	__u64 points;
+	__u32 count_handles;
+	__u32 flags;
+};
+
+
+/* Query current scanout sequence number */
+struct drm_crtc_get_sequence {
+	__u32 crtc_id;		/* requested crtc_id */
+	__u32 active;		/* return: crtc output is active */
+	__u64 sequence;		/* return: most recent vblank sequence */
+	__s64 sequence_ns;	/* return: most recent time of first pixel out */
+};
+
+/* Queue event to be delivered at specified sequence. Time stamp marks
+ * when the first pixel of the refresh cycle leaves the display engine
+ * for the display
+ */
+#define DRM_CRTC_SEQUENCE_RELATIVE		0x00000001	/* sequence is relative to current */
+#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS		0x00000002	/* Use next sequence if we've missed */
+
+struct drm_crtc_queue_sequence {
+	__u32 crtc_id;
+	__u32 flags;
+	__u64 sequence;		/* on input, target sequence. on output, actual sequence */
+	__u64 user_data;	/* user data passed to event */
+};
+
+#define DRM_CLIENT_NAME_MAX_LEN		64
+struct drm_set_client_name {
+	__u64 name_len;
+	__u64 name;
+};
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "drm_mode.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_IOCTL_BASE			'd'
+#define DRM_IO(nr)			_IO(DRM_IOCTL_BASE,nr)
+#define DRM_IOR(nr,type)		_IOR(DRM_IOCTL_BASE,nr,type)
+#define DRM_IOW(nr,type)		_IOW(DRM_IOCTL_BASE,nr,type)
+#define DRM_IOWR(nr,type)		_IOWR(DRM_IOCTL_BASE,nr,type)
+
+#define DRM_IOCTL_VERSION		DRM_IOWR(0x00, struct drm_version)
+#define DRM_IOCTL_GET_UNIQUE		DRM_IOWR(0x01, struct drm_unique)
+#define DRM_IOCTL_GET_MAGIC		DRM_IOR( 0x02, struct drm_auth)
+#define DRM_IOCTL_IRQ_BUSID		DRM_IOWR(0x03, struct drm_irq_busid)
+#define DRM_IOCTL_GET_MAP               DRM_IOWR(0x04, struct drm_map)
+#define DRM_IOCTL_GET_CLIENT            DRM_IOWR(0x05, struct drm_client)
+#define DRM_IOCTL_GET_STATS             DRM_IOR( 0x06, struct drm_stats)
+#define DRM_IOCTL_SET_VERSION		DRM_IOWR(0x07, struct drm_set_version)
+#define DRM_IOCTL_MODESET_CTL           DRM_IOW(0x08, struct drm_modeset_ctl)
+/**
+ * DRM_IOCTL_GEM_CLOSE - Close a GEM handle.
+ *
+ * GEM handles are not reference-counted by the kernel. User-space is
+ * responsible for managing their lifetime. For example, if user-space imports
+ * the same memory object twice on the same DRM file description, the same GEM
+ * handle is returned by both imports, and user-space needs to ensure
+ * &DRM_IOCTL_GEM_CLOSE is performed once only. The same situation can happen
+ * when a memory object is allocated, then exported and imported again on the
+ * same DRM file description. The &DRM_IOCTL_MODE_GETFB2 IOCTL is an exception
+ * and always returns fresh new GEM handles even if an existing GEM handle
+ * already refers to the same memory object before the IOCTL is performed.
+ */
+#define DRM_IOCTL_GEM_CLOSE		DRM_IOW (0x09, struct drm_gem_close)
+#define DRM_IOCTL_GEM_FLINK		DRM_IOWR(0x0a, struct drm_gem_flink)
+#define DRM_IOCTL_GEM_OPEN		DRM_IOWR(0x0b, struct drm_gem_open)
+#define DRM_IOCTL_GET_CAP		DRM_IOWR(0x0c, struct drm_get_cap)
+#define DRM_IOCTL_SET_CLIENT_CAP	DRM_IOW( 0x0d, struct drm_set_client_cap)
+
+#define DRM_IOCTL_SET_UNIQUE		DRM_IOW( 0x10, struct drm_unique)
+#define DRM_IOCTL_AUTH_MAGIC		DRM_IOW( 0x11, struct drm_auth)
+#define DRM_IOCTL_BLOCK			DRM_IOWR(0x12, struct drm_block)
+#define DRM_IOCTL_UNBLOCK		DRM_IOWR(0x13, struct drm_block)
+#define DRM_IOCTL_CONTROL		DRM_IOW( 0x14, struct drm_control)
+#define DRM_IOCTL_ADD_MAP		DRM_IOWR(0x15, struct drm_map)
+#define DRM_IOCTL_ADD_BUFS		DRM_IOWR(0x16, struct drm_buf_desc)
+#define DRM_IOCTL_MARK_BUFS		DRM_IOW( 0x17, struct drm_buf_desc)
+#define DRM_IOCTL_INFO_BUFS		DRM_IOWR(0x18, struct drm_buf_info)
+#define DRM_IOCTL_MAP_BUFS		DRM_IOWR(0x19, struct drm_buf_map)
+#define DRM_IOCTL_FREE_BUFS		DRM_IOW( 0x1a, struct drm_buf_free)
+
+#define DRM_IOCTL_RM_MAP		DRM_IOW( 0x1b, struct drm_map)
+
+#define DRM_IOCTL_SET_SAREA_CTX		DRM_IOW( 0x1c, struct drm_ctx_priv_map)
+#define DRM_IOCTL_GET_SAREA_CTX 	DRM_IOWR(0x1d, struct drm_ctx_priv_map)
+
+#define DRM_IOCTL_SET_MASTER            DRM_IO(0x1e)
+#define DRM_IOCTL_DROP_MASTER           DRM_IO(0x1f)
+
+#define DRM_IOCTL_ADD_CTX		DRM_IOWR(0x20, struct drm_ctx)
+#define DRM_IOCTL_RM_CTX		DRM_IOWR(0x21, struct drm_ctx)
+#define DRM_IOCTL_MOD_CTX		DRM_IOW( 0x22, struct drm_ctx)
+#define DRM_IOCTL_GET_CTX		DRM_IOWR(0x23, struct drm_ctx)
+#define DRM_IOCTL_SWITCH_CTX		DRM_IOW( 0x24, struct drm_ctx)
+#define DRM_IOCTL_NEW_CTX		DRM_IOW( 0x25, struct drm_ctx)
+#define DRM_IOCTL_RES_CTX		DRM_IOWR(0x26, struct drm_ctx_res)
+#define DRM_IOCTL_ADD_DRAW		DRM_IOWR(0x27, struct drm_draw)
+#define DRM_IOCTL_RM_DRAW		DRM_IOWR(0x28, struct drm_draw)
+#define DRM_IOCTL_DMA			DRM_IOWR(0x29, struct drm_dma)
+#define DRM_IOCTL_LOCK			DRM_IOW( 0x2a, struct drm_lock)
+#define DRM_IOCTL_UNLOCK		DRM_IOW( 0x2b, struct drm_lock)
+#define DRM_IOCTL_FINISH		DRM_IOW( 0x2c, struct drm_lock)
+
+/**
+ * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD.
+ *
+ * User-space sets &drm_prime_handle.handle with the GEM handle to export and
+ * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in
+ * &drm_prime_handle.fd.
+ *
+ * The export can fail for any driver-specific reason, e.g. because export is
+ * not supported for this specific GEM handle (but might be for others).
+ *
+ * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT.
+ */
+#define DRM_IOCTL_PRIME_HANDLE_TO_FD    DRM_IOWR(0x2d, struct drm_prime_handle)
+/**
+ * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle.
+ *
+ * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to
+ * import, and gets back a GEM handle in &drm_prime_handle.handle.
+ * &drm_prime_handle.flags is unused.
+ *
+ * If an existing GEM handle refers to the memory object backing the DMA-BUF,
+ * that GEM handle is returned. Therefore user-space which needs to handle
+ * arbitrary DMA-BUFs must have a user-space lookup data structure to manually
+ * reference-count duplicated GEM handles. For more information see
+ * &DRM_IOCTL_GEM_CLOSE.
+ *
+ * The import can fail for any driver-specific reason, e.g. because import is
+ * only supported for DMA-BUFs allocated on this DRM device.
+ *
+ * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT.
+ */
+#define DRM_IOCTL_PRIME_FD_TO_HANDLE    DRM_IOWR(0x2e, struct drm_prime_handle)
+
+#define DRM_IOCTL_AGP_ACQUIRE		DRM_IO(  0x30)
+#define DRM_IOCTL_AGP_RELEASE		DRM_IO(  0x31)
+#define DRM_IOCTL_AGP_ENABLE		DRM_IOW( 0x32, struct drm_agp_mode)
+#define DRM_IOCTL_AGP_INFO		DRM_IOR( 0x33, struct drm_agp_info)
+#define DRM_IOCTL_AGP_ALLOC		DRM_IOWR(0x34, struct drm_agp_buffer)
+#define DRM_IOCTL_AGP_FREE		DRM_IOW( 0x35, struct drm_agp_buffer)
+#define DRM_IOCTL_AGP_BIND		DRM_IOW( 0x36, struct drm_agp_binding)
+#define DRM_IOCTL_AGP_UNBIND		DRM_IOW( 0x37, struct drm_agp_binding)
+
+#define DRM_IOCTL_SG_ALLOC		DRM_IOWR(0x38, struct drm_scatter_gather)
+#define DRM_IOCTL_SG_FREE		DRM_IOW( 0x39, struct drm_scatter_gather)
+
+#define DRM_IOCTL_WAIT_VBLANK		DRM_IOWR(0x3a, union drm_wait_vblank)
+
+#define DRM_IOCTL_CRTC_GET_SEQUENCE	DRM_IOWR(0x3b, struct drm_crtc_get_sequence)
+#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE	DRM_IOWR(0x3c, struct drm_crtc_queue_sequence)
+
+#define DRM_IOCTL_UPDATE_DRAW		DRM_IOW(0x3f, struct drm_update_draw)
+
+#define DRM_IOCTL_MODE_GETRESOURCES	DRM_IOWR(0xA0, struct drm_mode_card_res)
+#define DRM_IOCTL_MODE_GETCRTC		DRM_IOWR(0xA1, struct drm_mode_crtc)
+#define DRM_IOCTL_MODE_SETCRTC		DRM_IOWR(0xA2, struct drm_mode_crtc)
+#define DRM_IOCTL_MODE_CURSOR		DRM_IOWR(0xA3, struct drm_mode_cursor)
+#define DRM_IOCTL_MODE_GETGAMMA		DRM_IOWR(0xA4, struct drm_mode_crtc_lut)
+#define DRM_IOCTL_MODE_SETGAMMA		DRM_IOWR(0xA5, struct drm_mode_crtc_lut)
+#define DRM_IOCTL_MODE_GETENCODER	DRM_IOWR(0xA6, struct drm_mode_get_encoder)
+#define DRM_IOCTL_MODE_GETCONNECTOR	DRM_IOWR(0xA7, struct drm_mode_get_connector)
+#define DRM_IOCTL_MODE_ATTACHMODE	DRM_IOWR(0xA8, struct drm_mode_mode_cmd) /* deprecated (never worked) */
+#define DRM_IOCTL_MODE_DETACHMODE	DRM_IOWR(0xA9, struct drm_mode_mode_cmd) /* deprecated (never worked) */
+
+#define DRM_IOCTL_MODE_GETPROPERTY	DRM_IOWR(0xAA, struct drm_mode_get_property)
+#define DRM_IOCTL_MODE_SETPROPERTY	DRM_IOWR(0xAB, struct drm_mode_connector_set_property)
+#define DRM_IOCTL_MODE_GETPROPBLOB	DRM_IOWR(0xAC, struct drm_mode_get_blob)
+#define DRM_IOCTL_MODE_GETFB		DRM_IOWR(0xAD, struct drm_mode_fb_cmd)
+#define DRM_IOCTL_MODE_ADDFB		DRM_IOWR(0xAE, struct drm_mode_fb_cmd)
+/**
+ * DRM_IOCTL_MODE_RMFB - Remove a framebuffer.
+ *
+ * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL
+ * argument is a framebuffer object ID.
+ *
+ * Warning: removing a framebuffer currently in-use on an enabled plane will
+ * disable that plane. The CRTC the plane is linked to may also be disabled
+ * (depending on driver capabilities).
+ */
+#define DRM_IOCTL_MODE_RMFB		DRM_IOWR(0xAF, unsigned int)
+#define DRM_IOCTL_MODE_PAGE_FLIP	DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip)
+#define DRM_IOCTL_MODE_DIRTYFB		DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd)
+
+/**
+ * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object.
+ *
+ * KMS dumb buffers provide a very primitive way to allocate a buffer object
+ * suitable for scanout and map it for software rendering. KMS dumb buffers are
+ * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb
+ * buffers are not suitable to be displayed on any other device than the KMS
+ * device where they were allocated from. Also see
+ * :ref:`kms_dumb_buffer_objects`.
+ *
+ * The IOCTL argument is a struct drm_mode_create_dumb.
+ *
+ * User-space is expected to create a KMS dumb buffer via this IOCTL, then add
+ * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via
+ * &DRM_IOCTL_MODE_MAP_DUMB.
+ *
+ * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported.
+ * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate
+ * driver preferences for dumb buffers.
+ */
+#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb)
+#define DRM_IOCTL_MODE_MAP_DUMB    DRM_IOWR(0xB3, struct drm_mode_map_dumb)
+#define DRM_IOCTL_MODE_DESTROY_DUMB    DRM_IOWR(0xB4, struct drm_mode_destroy_dumb)
+#define DRM_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xB5, struct drm_mode_get_plane_res)
+#define DRM_IOCTL_MODE_GETPLANE	DRM_IOWR(0xB6, struct drm_mode_get_plane)
+#define DRM_IOCTL_MODE_SETPLANE	DRM_IOWR(0xB7, struct drm_mode_set_plane)
+#define DRM_IOCTL_MODE_ADDFB2		DRM_IOWR(0xB8, struct drm_mode_fb_cmd2)
+#define DRM_IOCTL_MODE_OBJ_GETPROPERTIES	DRM_IOWR(0xB9, struct drm_mode_obj_get_properties)
+#define DRM_IOCTL_MODE_OBJ_SETPROPERTY	DRM_IOWR(0xBA, struct drm_mode_obj_set_property)
+#define DRM_IOCTL_MODE_CURSOR2		DRM_IOWR(0xBB, struct drm_mode_cursor2)
+#define DRM_IOCTL_MODE_ATOMIC		DRM_IOWR(0xBC, struct drm_mode_atomic)
+#define DRM_IOCTL_MODE_CREATEPROPBLOB	DRM_IOWR(0xBD, struct drm_mode_create_blob)
+#define DRM_IOCTL_MODE_DESTROYPROPBLOB	DRM_IOWR(0xBE, struct drm_mode_destroy_blob)
+
+#define DRM_IOCTL_SYNCOBJ_CREATE	DRM_IOWR(0xBF, struct drm_syncobj_create)
+#define DRM_IOCTL_SYNCOBJ_DESTROY	DRM_IOWR(0xC0, struct drm_syncobj_destroy)
+#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD	DRM_IOWR(0xC1, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE	DRM_IOWR(0xC2, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_WAIT		DRM_IOWR(0xC3, struct drm_syncobj_wait)
+#define DRM_IOCTL_SYNCOBJ_RESET		DRM_IOWR(0xC4, struct drm_syncobj_array)
+#define DRM_IOCTL_SYNCOBJ_SIGNAL	DRM_IOWR(0xC5, struct drm_syncobj_array)
+
+#define DRM_IOCTL_MODE_CREATE_LEASE	DRM_IOWR(0xC6, struct drm_mode_create_lease)
+#define DRM_IOCTL_MODE_LIST_LESSEES	DRM_IOWR(0xC7, struct drm_mode_list_lessees)
+#define DRM_IOCTL_MODE_GET_LEASE	DRM_IOWR(0xC8, struct drm_mode_get_lease)
+#define DRM_IOCTL_MODE_REVOKE_LEASE	DRM_IOWR(0xC9, struct drm_mode_revoke_lease)
+
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
+#define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
+
+/**
+ * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata.
+ *
+ * This queries metadata about a framebuffer. User-space fills
+ * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the
+ * struct as the output.
+ *
+ * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles
+ * will be filled with GEM buffer handles. Fresh new GEM handles are always
+ * returned, even if another GEM handle referring to the same memory object
+ * already exists on the DRM file description. The caller is responsible for
+ * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same
+ * new handle will be returned for multiple planes in case they use the same
+ * memory object. Planes are valid until one has a zero handle -- this can be
+ * used to compute the number of planes.
+ *
+ * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid
+ * until one has a zero &drm_mode_fb_cmd2.pitches.
+ *
+ * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set
+ * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the
+ * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier.
+ *
+ * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space
+ * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately
+ * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not
+ * double-close handles which are specified multiple times in the array.
+ */
+#define DRM_IOCTL_MODE_GETFB2		DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
+
+#define DRM_IOCTL_SYNCOBJ_EVENTFD	DRM_IOWR(0xCF, struct drm_syncobj_eventfd)
+
+/**
+ * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer.
+ *
+ * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL
+ * argument is a framebuffer object ID.
+ *
+ * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable
+ * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept
+ * alive. When the plane no longer uses the framebuffer (because the
+ * framebuffer is replaced with another one, or the plane is disabled), the
+ * framebuffer is cleaned up.
+ *
+ * This is useful to implement flicker-free transitions between two processes.
+ *
+ * Depending on the threat model, user-space may want to ensure that the
+ * framebuffer doesn't expose any sensitive user information: closed
+ * framebuffers attached to a plane can be read back by the next DRM master.
+ */
+#define DRM_IOCTL_MODE_CLOSEFB		DRM_IOWR(0xD0, struct drm_mode_closefb)
+
+/**
+ * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file
+ *
+ * Having a name allows for easier tracking and debugging.
+ * The length of the name (without null ending char) must be
+ * <= DRM_CLIENT_NAME_MAX_LEN.
+ * The call will fail if the name contains whitespaces or non-printable chars.
+ */
+#define DRM_IOCTL_SET_CLIENT_NAME	DRM_IOWR(0xD1, struct drm_set_client_name)
+
+/**
+ * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle
+ *
+ * Some applications (notably CRIU) need objects to have specific gem handles.
+ * This ioctl changes the object at one gem handle to use a new gem handle.
+ */
+#define DRM_IOCTL_GEM_CHANGE_HANDLE    DRM_IOWR(0xD2, struct drm_gem_change_handle)
+
+/*
+ * Device specific ioctls should only be in their respective headers
+ * The device specific ioctl range is from 0x40 to 0x9f.
+ * Generic IOCTLS restart at 0xA0.
+ *
+ * \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and
+ * drmCommandReadWrite().
+ */
+#define DRM_COMMAND_BASE                0x40
+#define DRM_COMMAND_END			0xA0
+
+/**
+ * struct drm_event - Header for DRM events
+ * @type: event type.
+ * @length: total number of payload bytes (including header).
+ *
+ * This struct is a header for events written back to user-space on the DRM FD.
+ * A read on the DRM FD will always only return complete events: e.g. if the
+ * read buffer is 100 bytes large and there are two 64 byte events pending,
+ * only one will be returned.
+ *
+ * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and
+ * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK,
+ * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE.
+ */
+struct drm_event {
+	__u32 type;
+	__u32 length;
+};
+
+/**
+ * DRM_EVENT_VBLANK - vertical blanking event
+ *
+ * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the
+ * &_DRM_VBLANK_EVENT flag set.
+ *
+ * The event payload is a struct drm_event_vblank.
+ */
+#define DRM_EVENT_VBLANK 0x01
+/**
+ * DRM_EVENT_FLIP_COMPLETE - page-flip completion event
+ *
+ * This event is sent in response to an atomic commit or legacy page-flip with
+ * the &DRM_MODE_PAGE_FLIP_EVENT flag set.
+ *
+ * The event payload is a struct drm_event_vblank.
+ */
+#define DRM_EVENT_FLIP_COMPLETE 0x02
+/**
+ * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event
+ *
+ * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE.
+ *
+ * The event payload is a struct drm_event_crtc_sequence.
+ */
+#define DRM_EVENT_CRTC_SEQUENCE	0x03
+
+struct drm_event_vblank {
+	struct drm_event base;
+	__u64 user_data;
+	__u32 tv_sec;
+	__u32 tv_usec;
+	__u32 sequence;
+	__u32 crtc_id; /* 0 on older kernels that do not support this */
+};
+
+/* Event delivered at sequence. Time stamp marks when the first pixel
+ * of the refresh cycle leaves the display engine for the display
+ */
+struct drm_event_crtc_sequence {
+	struct drm_event	base;
+	__u64			user_data;
+	__s64			time_ns;
+	__u64			sequence;
+};
+
+/* typedef area */
+#ifndef __KERNEL__
+typedef struct drm_clip_rect drm_clip_rect_t;
+typedef struct drm_drawable_info drm_drawable_info_t;
+typedef struct drm_tex_region drm_tex_region_t;
+typedef struct drm_hw_lock drm_hw_lock_t;
+typedef struct drm_version drm_version_t;
+typedef struct drm_unique drm_unique_t;
+typedef struct drm_list drm_list_t;
+typedef struct drm_block drm_block_t;
+typedef struct drm_control drm_control_t;
+typedef enum drm_map_type drm_map_type_t;
+typedef enum drm_map_flags drm_map_flags_t;
+typedef struct drm_ctx_priv_map drm_ctx_priv_map_t;
+typedef struct drm_map drm_map_t;
+typedef struct drm_client drm_client_t;
+typedef enum drm_stat_type drm_stat_type_t;
+typedef struct drm_stats drm_stats_t;
+typedef enum drm_lock_flags drm_lock_flags_t;
+typedef struct drm_lock drm_lock_t;
+typedef enum drm_dma_flags drm_dma_flags_t;
+typedef struct drm_buf_desc drm_buf_desc_t;
+typedef struct drm_buf_info drm_buf_info_t;
+typedef struct drm_buf_free drm_buf_free_t;
+typedef struct drm_buf_pub drm_buf_pub_t;
+typedef struct drm_buf_map drm_buf_map_t;
+typedef struct drm_dma drm_dma_t;
+typedef union drm_wait_vblank drm_wait_vblank_t;
+typedef struct drm_agp_mode drm_agp_mode_t;
+typedef enum drm_ctx_flags drm_ctx_flags_t;
+typedef struct drm_ctx drm_ctx_t;
+typedef struct drm_ctx_res drm_ctx_res_t;
+typedef struct drm_draw drm_draw_t;
+typedef struct drm_update_draw drm_update_draw_t;
+typedef struct drm_auth drm_auth_t;
+typedef struct drm_irq_busid drm_irq_busid_t;
+typedef enum drm_vblank_seq_type drm_vblank_seq_type_t;
+
+typedef struct drm_agp_buffer drm_agp_buffer_t;
+typedef struct drm_agp_binding drm_agp_binding_t;
+typedef struct drm_agp_info drm_agp_info_t;
+typedef struct drm_scatter_gather drm_scatter_gather_t;
+typedef struct drm_set_version drm_set_version_t;
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/plugins/amdgpu/drm_mode.h b/plugins/amdgpu/drm_mode.h
new file mode 100644
index 000000000..c082810c0
--- /dev/null
+++ b/plugins/amdgpu/drm_mode.h
@@ -0,0 +1,1362 @@
+/*
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (c) 2007 Jakob Bornecrantz <wallbraker@gmail.com>
+ * Copyright (c) 2008 Red Hat Inc.
+ * Copyright (c) 2007-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA
+ * Copyright (c) 2007-2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _DRM_MODE_H
+#define _DRM_MODE_H
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/**
+ * DOC: overview
+ *
+ * DRM exposes many UAPI and structure definitions to have a consistent
+ * and standardized interface with users.
+ * Userspace can refer to these structure definitions and UAPI formats
+ * to communicate to drivers.
+ */
+
+#define DRM_CONNECTOR_NAME_LEN	32
+#define DRM_DISPLAY_MODE_LEN	32
+#define DRM_PROP_NAME_LEN	32
+
+#define DRM_MODE_TYPE_BUILTIN	(1<<0) /* deprecated */
+#define DRM_MODE_TYPE_CLOCK_C	((1<<1) | DRM_MODE_TYPE_BUILTIN) /* deprecated */
+#define DRM_MODE_TYPE_CRTC_C	((1<<2) | DRM_MODE_TYPE_BUILTIN) /* deprecated */
+#define DRM_MODE_TYPE_PREFERRED	(1<<3)
+#define DRM_MODE_TYPE_DEFAULT	(1<<4) /* deprecated */
+#define DRM_MODE_TYPE_USERDEF	(1<<5)
+#define DRM_MODE_TYPE_DRIVER	(1<<6)
+
+#define DRM_MODE_TYPE_ALL	(DRM_MODE_TYPE_PREFERRED |	\
+				 DRM_MODE_TYPE_USERDEF |	\
+				 DRM_MODE_TYPE_DRIVER)
+
+/* Video mode flags */
+/* bit compatible with the xrandr RR_ definitions (bits 0-13)
+ *
+ * ABI warning: Existing userspace really expects
+ * the mode flags to match the xrandr definitions. Any
+ * changes that don't match the xrandr definitions will
+ * likely need a new client cap or some other mechanism
+ * to avoid breaking existing userspace. This includes
+ * allocating new flags in the previously unused bits!
+ */
+#define DRM_MODE_FLAG_PHSYNC			(1<<0)
+#define DRM_MODE_FLAG_NHSYNC			(1<<1)
+#define DRM_MODE_FLAG_PVSYNC			(1<<2)
+#define DRM_MODE_FLAG_NVSYNC			(1<<3)
+#define DRM_MODE_FLAG_INTERLACE			(1<<4)
+#define DRM_MODE_FLAG_DBLSCAN			(1<<5)
+#define DRM_MODE_FLAG_CSYNC			(1<<6)
+#define DRM_MODE_FLAG_PCSYNC			(1<<7)
+#define DRM_MODE_FLAG_NCSYNC			(1<<8)
+#define DRM_MODE_FLAG_HSKEW			(1<<9) /* hskew provided */
+#define DRM_MODE_FLAG_BCAST			(1<<10) /* deprecated */
+#define DRM_MODE_FLAG_PIXMUX			(1<<11) /* deprecated */
+#define DRM_MODE_FLAG_DBLCLK			(1<<12)
+#define DRM_MODE_FLAG_CLKDIV2			(1<<13)
+ /*
+  * When adding a new stereo mode don't forget to adjust DRM_MODE_FLAGS_3D_MAX
+  * (define not exposed to user space).
+  */
+#define DRM_MODE_FLAG_3D_MASK			(0x1f<<14)
+#define  DRM_MODE_FLAG_3D_NONE		(0<<14)
+#define  DRM_MODE_FLAG_3D_FRAME_PACKING		(1<<14)
+#define  DRM_MODE_FLAG_3D_FIELD_ALTERNATIVE	(2<<14)
+#define  DRM_MODE_FLAG_3D_LINE_ALTERNATIVE	(3<<14)
+#define  DRM_MODE_FLAG_3D_SIDE_BY_SIDE_FULL	(4<<14)
+#define  DRM_MODE_FLAG_3D_L_DEPTH		(5<<14)
+#define  DRM_MODE_FLAG_3D_L_DEPTH_GFX_GFX_DEPTH	(6<<14)
+#define  DRM_MODE_FLAG_3D_TOP_AND_BOTTOM	(7<<14)
+#define  DRM_MODE_FLAG_3D_SIDE_BY_SIDE_HALF	(8<<14)
+
+/* Picture aspect ratio options */
+#define DRM_MODE_PICTURE_ASPECT_NONE		0
+#define DRM_MODE_PICTURE_ASPECT_4_3		1
+#define DRM_MODE_PICTURE_ASPECT_16_9		2
+#define DRM_MODE_PICTURE_ASPECT_64_27		3
+#define DRM_MODE_PICTURE_ASPECT_256_135		4
+
+/* Content type options */
+#define DRM_MODE_CONTENT_TYPE_NO_DATA		0
+#define DRM_MODE_CONTENT_TYPE_GRAPHICS		1
+#define DRM_MODE_CONTENT_TYPE_PHOTO		2
+#define DRM_MODE_CONTENT_TYPE_CINEMA		3
+#define DRM_MODE_CONTENT_TYPE_GAME		4
+
+/* Aspect ratio flag bitmask (4 bits 22:19) */
+#define DRM_MODE_FLAG_PIC_AR_MASK		(0x0F<<19)
+#define  DRM_MODE_FLAG_PIC_AR_NONE \
+			(DRM_MODE_PICTURE_ASPECT_NONE<<19)
+#define  DRM_MODE_FLAG_PIC_AR_4_3 \
+			(DRM_MODE_PICTURE_ASPECT_4_3<<19)
+#define  DRM_MODE_FLAG_PIC_AR_16_9 \
+			(DRM_MODE_PICTURE_ASPECT_16_9<<19)
+#define  DRM_MODE_FLAG_PIC_AR_64_27 \
+			(DRM_MODE_PICTURE_ASPECT_64_27<<19)
+#define  DRM_MODE_FLAG_PIC_AR_256_135 \
+			(DRM_MODE_PICTURE_ASPECT_256_135<<19)
+
+#define  DRM_MODE_FLAG_ALL	(DRM_MODE_FLAG_PHSYNC |		\
+				 DRM_MODE_FLAG_NHSYNC |		\
+				 DRM_MODE_FLAG_PVSYNC |		\
+				 DRM_MODE_FLAG_NVSYNC |		\
+				 DRM_MODE_FLAG_INTERLACE |	\
+				 DRM_MODE_FLAG_DBLSCAN |	\
+				 DRM_MODE_FLAG_CSYNC |		\
+				 DRM_MODE_FLAG_PCSYNC |		\
+				 DRM_MODE_FLAG_NCSYNC |		\
+				 DRM_MODE_FLAG_HSKEW |		\
+				 DRM_MODE_FLAG_DBLCLK |		\
+				 DRM_MODE_FLAG_CLKDIV2 |	\
+				 DRM_MODE_FLAG_3D_MASK)
+
+/* DPMS flags */
+/* bit compatible with the xorg definitions. */
+#define DRM_MODE_DPMS_ON	0
+#define DRM_MODE_DPMS_STANDBY	1
+#define DRM_MODE_DPMS_SUSPEND	2
+#define DRM_MODE_DPMS_OFF	3
+
+/* Scaling mode options */
+#define DRM_MODE_SCALE_NONE		0 /* Unmodified timing (display or
+					     software can still scale) */
+#define DRM_MODE_SCALE_FULLSCREEN	1 /* Full screen, ignore aspect */
+#define DRM_MODE_SCALE_CENTER		2 /* Centered, no scaling */
+#define DRM_MODE_SCALE_ASPECT		3 /* Full screen, preserve aspect */
+
+/* Dithering mode options */
+#define DRM_MODE_DITHERING_OFF	0
+#define DRM_MODE_DITHERING_ON	1
+#define DRM_MODE_DITHERING_AUTO 2
+
+/* Dirty info options */
+#define DRM_MODE_DIRTY_OFF      0
+#define DRM_MODE_DIRTY_ON       1
+#define DRM_MODE_DIRTY_ANNOTATE 2
+
+/* Link Status options */
+#define DRM_MODE_LINK_STATUS_GOOD	0
+#define DRM_MODE_LINK_STATUS_BAD	1
+
+/*
+ * DRM_MODE_ROTATE_<degrees>
+ *
+ * Signals that a drm plane is been rotated <degrees> degrees in counter
+ * clockwise direction.
+ *
+ * This define is provided as a convenience, looking up the property id
+ * using the name->prop id lookup is the preferred method.
+ */
+#define DRM_MODE_ROTATE_0       (1<<0)
+#define DRM_MODE_ROTATE_90      (1<<1)
+#define DRM_MODE_ROTATE_180     (1<<2)
+#define DRM_MODE_ROTATE_270     (1<<3)
+
+/*
+ * DRM_MODE_ROTATE_MASK
+ *
+ * Bitmask used to look for drm plane rotations.
+ */
+#define DRM_MODE_ROTATE_MASK (\
+		DRM_MODE_ROTATE_0  | \
+		DRM_MODE_ROTATE_90  | \
+		DRM_MODE_ROTATE_180 | \
+		DRM_MODE_ROTATE_270)
+
+/*
+ * DRM_MODE_REFLECT_<axis>
+ *
+ * Signals that the contents of a drm plane is reflected along the <axis> axis,
+ * in the same way as mirroring.
+ * See kerneldoc chapter "Plane Composition Properties" for more details.
+ *
+ * This define is provided as a convenience, looking up the property id
+ * using the name->prop id lookup is the preferred method.
+ */
+#define DRM_MODE_REFLECT_X      (1<<4)
+#define DRM_MODE_REFLECT_Y      (1<<5)
+
+/*
+ * DRM_MODE_REFLECT_MASK
+ *
+ * Bitmask used to look for drm plane reflections.
+ */
+#define DRM_MODE_REFLECT_MASK (\
+		DRM_MODE_REFLECT_X | \
+		DRM_MODE_REFLECT_Y)
+
+/* Content Protection Flags */
+#define DRM_MODE_CONTENT_PROTECTION_UNDESIRED	0
+#define DRM_MODE_CONTENT_PROTECTION_DESIRED     1
+#define DRM_MODE_CONTENT_PROTECTION_ENABLED     2
+
+/**
+ * struct drm_mode_modeinfo - Display mode information.
+ * @clock: pixel clock in kHz
+ * @hdisplay: horizontal display size
+ * @hsync_start: horizontal sync start
+ * @hsync_end: horizontal sync end
+ * @htotal: horizontal total size
+ * @hskew: horizontal skew
+ * @vdisplay: vertical display size
+ * @vsync_start: vertical sync start
+ * @vsync_end: vertical sync end
+ * @vtotal: vertical total size
+ * @vscan: vertical scan
+ * @vrefresh: approximate vertical refresh rate in Hz
+ * @flags: bitmask of misc. flags, see DRM_MODE_FLAG_* defines
+ * @type: bitmask of type flags, see DRM_MODE_TYPE_* defines
+ * @name: string describing the mode resolution
+ *
+ * This is the user-space API display mode information structure. For the
+ * kernel version see struct drm_display_mode.
+ */
+struct drm_mode_modeinfo {
+	__u32 clock;
+	__u16 hdisplay;
+	__u16 hsync_start;
+	__u16 hsync_end;
+	__u16 htotal;
+	__u16 hskew;
+	__u16 vdisplay;
+	__u16 vsync_start;
+	__u16 vsync_end;
+	__u16 vtotal;
+	__u16 vscan;
+
+	__u32 vrefresh;
+
+	__u32 flags;
+	__u32 type;
+	char name[DRM_DISPLAY_MODE_LEN];
+};
+
+struct drm_mode_card_res {
+	__u64 fb_id_ptr;
+	__u64 crtc_id_ptr;
+	__u64 connector_id_ptr;
+	__u64 encoder_id_ptr;
+	__u32 count_fbs;
+	__u32 count_crtcs;
+	__u32 count_connectors;
+	__u32 count_encoders;
+	__u32 min_width;
+	__u32 max_width;
+	__u32 min_height;
+	__u32 max_height;
+};
+
+struct drm_mode_crtc {
+	__u64 set_connectors_ptr;
+	__u32 count_connectors;
+
+	__u32 crtc_id; /**< Id */
+	__u32 fb_id; /**< Id of framebuffer */
+
+	__u32 x; /**< x Position on the framebuffer */
+	__u32 y; /**< y Position on the framebuffer */
+
+	__u32 gamma_size;
+	__u32 mode_valid;
+	struct drm_mode_modeinfo mode;
+};
+
+#define DRM_MODE_PRESENT_TOP_FIELD	(1<<0)
+#define DRM_MODE_PRESENT_BOTTOM_FIELD	(1<<1)
+
+/* Planes blend with or override other bits on the CRTC */
+struct drm_mode_set_plane {
+	__u32 plane_id;
+	__u32 crtc_id;
+	__u32 fb_id; /* fb object contains surface format type */
+	__u32 flags; /* see above flags */
+
+	/* Signed dest location allows it to be partially off screen */
+	__s32 crtc_x;
+	__s32 crtc_y;
+	__u32 crtc_w;
+	__u32 crtc_h;
+
+	/* Source values are 16.16 fixed point */
+	__u32 src_x;
+	__u32 src_y;
+	__u32 src_h;
+	__u32 src_w;
+};
+
+/**
+ * struct drm_mode_get_plane - Get plane metadata.
+ *
+ * Userspace can perform a GETPLANE ioctl to retrieve information about a
+ * plane.
+ *
+ * To retrieve the number of formats supported, set @count_format_types to zero
+ * and call the ioctl. @count_format_types will be updated with the value.
+ *
+ * To retrieve these formats, allocate an array with the memory needed to store
+ * @count_format_types formats. Point @format_type_ptr to this array and call
+ * the ioctl again (with @count_format_types still set to the value returned in
+ * the first ioctl call).
+ */
+struct drm_mode_get_plane {
+	/**
+	 * @plane_id: Object ID of the plane whose information should be
+	 * retrieved. Set by caller.
+	 */
+	__u32 plane_id;
+
+	/** @crtc_id: Object ID of the current CRTC. */
+	__u32 crtc_id;
+	/** @fb_id: Object ID of the current fb. */
+	__u32 fb_id;
+
+	/**
+	 * @possible_crtcs: Bitmask of CRTC's compatible with the plane. CRTC's
+	 * are created and they receive an index, which corresponds to their
+	 * position in the bitmask. Bit N corresponds to
+	 * :ref:`CRTC index<crtc_index>` N.
+	 */
+	__u32 possible_crtcs;
+	/** @gamma_size: Never used. */
+	__u32 gamma_size;
+
+	/** @count_format_types: Number of formats. */
+	__u32 count_format_types;
+	/**
+	 * @format_type_ptr: Pointer to ``__u32`` array of formats that are
+	 * supported by the plane. These formats do not require modifiers.
+	 */
+	__u64 format_type_ptr;
+};
+
+struct drm_mode_get_plane_res {
+	__u64 plane_id_ptr;
+	__u32 count_planes;
+};
+
+#define DRM_MODE_ENCODER_NONE	0
+#define DRM_MODE_ENCODER_DAC	1
+#define DRM_MODE_ENCODER_TMDS	2
+#define DRM_MODE_ENCODER_LVDS	3
+#define DRM_MODE_ENCODER_TVDAC	4
+#define DRM_MODE_ENCODER_VIRTUAL 5
+#define DRM_MODE_ENCODER_DSI	6
+#define DRM_MODE_ENCODER_DPMST	7
+#define DRM_MODE_ENCODER_DPI	8
+
+struct drm_mode_get_encoder {
+	__u32 encoder_id;
+	__u32 encoder_type;
+
+	__u32 crtc_id; /**< Id of crtc */
+
+	__u32 possible_crtcs;
+	__u32 possible_clones;
+};
+
+/* This is for connectors with multiple signal types. */
+/* Try to match DRM_MODE_CONNECTOR_X as closely as possible. */
+enum drm_mode_subconnector {
+	DRM_MODE_SUBCONNECTOR_Automatic   = 0,  /* DVI-I, TV     */
+	DRM_MODE_SUBCONNECTOR_Unknown     = 0,  /* DVI-I, TV, DP */
+	DRM_MODE_SUBCONNECTOR_VGA	  = 1,  /*            DP */
+	DRM_MODE_SUBCONNECTOR_DVID	  = 3,  /* DVI-I      DP */
+	DRM_MODE_SUBCONNECTOR_DVIA	  = 4,  /* DVI-I         */
+	DRM_MODE_SUBCONNECTOR_Composite   = 5,  /*        TV     */
+	DRM_MODE_SUBCONNECTOR_SVIDEO	  = 6,  /*        TV     */
+	DRM_MODE_SUBCONNECTOR_Component   = 8,  /*        TV     */
+	DRM_MODE_SUBCONNECTOR_SCART	  = 9,  /*        TV     */
+	DRM_MODE_SUBCONNECTOR_DisplayPort = 10, /*            DP */
+	DRM_MODE_SUBCONNECTOR_HDMIA       = 11, /*            DP */
+	DRM_MODE_SUBCONNECTOR_Native      = 15, /*            DP */
+	DRM_MODE_SUBCONNECTOR_Wireless    = 18, /*            DP */
+};
+
+#define DRM_MODE_CONNECTOR_Unknown	0
+#define DRM_MODE_CONNECTOR_VGA		1
+#define DRM_MODE_CONNECTOR_DVII		2
+#define DRM_MODE_CONNECTOR_DVID		3
+#define DRM_MODE_CONNECTOR_DVIA		4
+#define DRM_MODE_CONNECTOR_Composite	5
+#define DRM_MODE_CONNECTOR_SVIDEO	6
+#define DRM_MODE_CONNECTOR_LVDS		7
+#define DRM_MODE_CONNECTOR_Component	8
+#define DRM_MODE_CONNECTOR_9PinDIN	9
+#define DRM_MODE_CONNECTOR_DisplayPort	10
+#define DRM_MODE_CONNECTOR_HDMIA	11
+#define DRM_MODE_CONNECTOR_HDMIB	12
+#define DRM_MODE_CONNECTOR_TV		13
+#define DRM_MODE_CONNECTOR_eDP		14
+#define DRM_MODE_CONNECTOR_VIRTUAL      15
+#define DRM_MODE_CONNECTOR_DSI		16
+#define DRM_MODE_CONNECTOR_DPI		17
+#define DRM_MODE_CONNECTOR_WRITEBACK	18
+#define DRM_MODE_CONNECTOR_SPI		19
+#define DRM_MODE_CONNECTOR_USB		20
+
+/**
+ * struct drm_mode_get_connector - Get connector metadata.
+ *
+ * User-space can perform a GETCONNECTOR ioctl to retrieve information about a
+ * connector. User-space is expected to retrieve encoders, modes and properties
+ * by performing this ioctl at least twice: the first time to retrieve the
+ * number of elements, the second time to retrieve the elements themselves.
+ *
+ * To retrieve the number of elements, set @count_props and @count_encoders to
+ * zero, set @count_modes to 1, and set @modes_ptr to a temporary struct
+ * drm_mode_modeinfo element.
+ *
+ * To retrieve the elements, allocate arrays for @encoders_ptr, @modes_ptr,
+ * @props_ptr and @prop_values_ptr, then set @count_modes, @count_props and
+ * @count_encoders to their capacity.
+ *
+ * Performing the ioctl only twice may be racy: the number of elements may have
+ * changed with a hotplug event in-between the two ioctls. User-space is
+ * expected to retry the last ioctl until the number of elements stabilizes.
+ * The kernel won't fill any array which doesn't have the expected length.
+ *
+ * **Force-probing a connector**
+ *
+ * If the @count_modes field is set to zero and the DRM client is the current
+ * DRM master, the kernel will perform a forced probe on the connector to
+ * refresh the connector status, modes and EDID. A forced-probe can be slow,
+ * might cause flickering and the ioctl will block.
+ *
+ * User-space needs to force-probe connectors to ensure their metadata is
+ * up-to-date at startup and after receiving a hot-plug event. User-space
+ * may perform a forced-probe when the user explicitly requests it. User-space
+ * shouldn't perform a forced-probe in other situations.
+ */
+struct drm_mode_get_connector {
+	/** @encoders_ptr: Pointer to ``__u32`` array of object IDs. */
+	__u64 encoders_ptr;
+	/** @modes_ptr: Pointer to struct drm_mode_modeinfo array. */
+	__u64 modes_ptr;
+	/** @props_ptr: Pointer to ``__u32`` array of property IDs. */
+	__u64 props_ptr;
+	/** @prop_values_ptr: Pointer to ``__u64`` array of property values. */
+	__u64 prop_values_ptr;
+
+	/** @count_modes: Number of modes. */
+	__u32 count_modes;
+	/** @count_props: Number of properties. */
+	__u32 count_props;
+	/** @count_encoders: Number of encoders. */
+	__u32 count_encoders;
+
+	/** @encoder_id: Object ID of the current encoder. */
+	__u32 encoder_id;
+	/** @connector_id: Object ID of the connector. */
+	__u32 connector_id;
+	/**
+	 * @connector_type: Type of the connector.
+	 *
+	 * See DRM_MODE_CONNECTOR_* defines.
+	 */
+	__u32 connector_type;
+	/**
+	 * @connector_type_id: Type-specific connector number.
+	 *
+	 * This is not an object ID. This is a per-type connector number. Each
+	 * (type, type_id) combination is unique across all connectors of a DRM
+	 * device.
+	 *
+	 * The (type, type_id) combination is not a stable identifier: the
+	 * type_id can change depending on the driver probe order.
+	 */
+	__u32 connector_type_id;
+
+	/**
+	 * @connection: Status of the connector.
+	 *
+	 * See enum drm_connector_status.
+	 */
+	__u32 connection;
+	/** @mm_width: Width of the connected sink in millimeters. */
+	__u32 mm_width;
+	/** @mm_height: Height of the connected sink in millimeters. */
+	__u32 mm_height;
+	/**
+	 * @subpixel: Subpixel order of the connected sink.
+	 *
+	 * See enum subpixel_order.
+	 */
+	__u32 subpixel;
+
+	/** @pad: Padding, must be zero. */
+	__u32 pad;
+};
+
+#define DRM_MODE_PROP_PENDING	(1<<0) /* deprecated, do not use */
+#define DRM_MODE_PROP_RANGE	(1<<1)
+#define DRM_MODE_PROP_IMMUTABLE	(1<<2)
+#define DRM_MODE_PROP_ENUM	(1<<3) /* enumerated type with text strings */
+#define DRM_MODE_PROP_BLOB	(1<<4)
+#define DRM_MODE_PROP_BITMASK	(1<<5) /* bitmask of enumerated types */
+
+/* non-extended types: legacy bitmask, one bit per type: */
+#define DRM_MODE_PROP_LEGACY_TYPE  ( \
+		DRM_MODE_PROP_RANGE | \
+		DRM_MODE_PROP_ENUM | \
+		DRM_MODE_PROP_BLOB | \
+		DRM_MODE_PROP_BITMASK)
+
+/* extended-types: rather than continue to consume a bit per type,
+ * grab a chunk of the bits to use as integer type id.
+ */
+#define DRM_MODE_PROP_EXTENDED_TYPE	0x0000ffc0
+#define DRM_MODE_PROP_TYPE(n)		((n) << 6)
+#define DRM_MODE_PROP_OBJECT		DRM_MODE_PROP_TYPE(1)
+#define DRM_MODE_PROP_SIGNED_RANGE	DRM_MODE_PROP_TYPE(2)
+
+/* the PROP_ATOMIC flag is used to hide properties from userspace that
+ * is not aware of atomic properties.  This is mostly to work around
+ * older userspace (DDX drivers) that read/write each prop they find,
+ * without being aware that this could be triggering a lengthy modeset.
+ */
+#define DRM_MODE_PROP_ATOMIC        0x80000000
+
+/**
+ * struct drm_mode_property_enum - Description for an enum/bitfield entry.
+ * @value: numeric value for this enum entry.
+ * @name: symbolic name for this enum entry.
+ *
+ * See struct drm_property_enum for details.
+ */
+struct drm_mode_property_enum {
+	__u64 value;
+	char name[DRM_PROP_NAME_LEN];
+};
+
+/**
+ * struct drm_mode_get_property - Get property metadata.
+ *
+ * User-space can perform a GETPROPERTY ioctl to retrieve information about a
+ * property. The same property may be attached to multiple objects, see
+ * "Modeset Base Object Abstraction".
+ *
+ * The meaning of the @values_ptr field changes depending on the property type.
+ * See &drm_property.flags for more details.
+ *
+ * The @enum_blob_ptr and @count_enum_blobs fields are only meaningful when the
+ * property has the type &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK. For
+ * backwards compatibility, the kernel will always set @count_enum_blobs to
+ * zero when the property has the type &DRM_MODE_PROP_BLOB. User-space must
+ * ignore these two fields if the property has a different type.
+ *
+ * User-space is expected to retrieve values and enums by performing this ioctl
+ * at least twice: the first time to retrieve the number of elements, the
+ * second time to retrieve the elements themselves.
+ *
+ * To retrieve the number of elements, set @count_values and @count_enum_blobs
+ * to zero, then call the ioctl. @count_values will be updated with the number
+ * of elements. If the property has the type &DRM_MODE_PROP_ENUM or
+ * &DRM_MODE_PROP_BITMASK, @count_enum_blobs will be updated as well.
+ *
+ * To retrieve the elements themselves, allocate an array for @values_ptr and
+ * set @count_values to its capacity. If the property has the type
+ * &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK, allocate an array for
+ * @enum_blob_ptr and set @count_enum_blobs to its capacity. Calling the ioctl
+ * again will fill the arrays.
+ */
+struct drm_mode_get_property {
+	/** @values_ptr: Pointer to a ``__u64`` array. */
+	__u64 values_ptr;
+	/** @enum_blob_ptr: Pointer to a struct drm_mode_property_enum array. */
+	__u64 enum_blob_ptr;
+
+	/**
+	 * @prop_id: Object ID of the property which should be retrieved. Set
+	 * by the caller.
+	 */
+	__u32 prop_id;
+	/**
+	 * @flags: ``DRM_MODE_PROP_*`` bitfield. See &drm_property.flags for
+	 * a definition of the flags.
+	 */
+	__u32 flags;
+	/**
+	 * @name: Symbolic property name. User-space should use this field to
+	 * recognize properties.
+	 */
+	char name[DRM_PROP_NAME_LEN];
+
+	/** @count_values: Number of elements in @values_ptr. */
+	__u32 count_values;
+	/** @count_enum_blobs: Number of elements in @enum_blob_ptr. */
+	__u32 count_enum_blobs;
+};
+
+struct drm_mode_connector_set_property {
+	__u64 value;
+	__u32 prop_id;
+	__u32 connector_id;
+};
+
+#define DRM_MODE_OBJECT_CRTC 0xcccccccc
+#define DRM_MODE_OBJECT_CONNECTOR 0xc0c0c0c0
+#define DRM_MODE_OBJECT_ENCODER 0xe0e0e0e0
+#define DRM_MODE_OBJECT_MODE 0xdededede
+#define DRM_MODE_OBJECT_PROPERTY 0xb0b0b0b0
+#define DRM_MODE_OBJECT_FB 0xfbfbfbfb
+#define DRM_MODE_OBJECT_BLOB 0xbbbbbbbb
+#define DRM_MODE_OBJECT_PLANE 0xeeeeeeee
+#define DRM_MODE_OBJECT_ANY 0
+
+struct drm_mode_obj_get_properties {
+	__u64 props_ptr;
+	__u64 prop_values_ptr;
+	__u32 count_props;
+	__u32 obj_id;
+	__u32 obj_type;
+};
+
+struct drm_mode_obj_set_property {
+	__u64 value;
+	__u32 prop_id;
+	__u32 obj_id;
+	__u32 obj_type;
+};
+
+struct drm_mode_get_blob {
+	__u32 blob_id;
+	__u32 length;
+	__u64 data;
+};
+
+struct drm_mode_fb_cmd {
+	__u32 fb_id;
+	__u32 width;
+	__u32 height;
+	__u32 pitch;
+	__u32 bpp;
+	__u32 depth;
+	/* driver specific handle */
+	__u32 handle;
+};
+
+#define DRM_MODE_FB_INTERLACED	(1<<0) /* for interlaced framebuffers */
+#define DRM_MODE_FB_MODIFIERS	(1<<1) /* enables ->modifier[] */
+
+/**
+ * struct drm_mode_fb_cmd2 - Frame-buffer metadata.
+ *
+ * This struct holds frame-buffer metadata. There are two ways to use it:
+ *
+ * - User-space can fill this struct and perform a &DRM_IOCTL_MODE_ADDFB2
+ *   ioctl to register a new frame-buffer. The new frame-buffer object ID will
+ *   be set by the kernel in @fb_id.
+ * - User-space can set @fb_id and perform a &DRM_IOCTL_MODE_GETFB2 ioctl to
+ *   fetch metadata about an existing frame-buffer.
+ *
+ * In case of planar formats, this struct allows up to 4 buffer objects with
+ * offsets and pitches per plane. The pitch and offset order are dictated by
+ * the format FourCC as defined by ``drm_fourcc.h``, e.g. NV12 is described as:
+ *
+ *     YUV 4:2:0 image with a plane of 8-bit Y samples followed by an
+ *     interleaved U/V plane containing 8-bit 2x2 subsampled colour difference
+ *     samples.
+ *
+ * So it would consist of a Y plane at ``offsets[0]`` and a UV plane at
+ * ``offsets[1]``.
+ *
+ * To accommodate tiled, compressed, etc formats, a modifier can be specified.
+ * For more information see the "Format Modifiers" section. Note that even
+ * though it looks like we have a modifier per-plane, we in fact do not. The
+ * modifier for each plane must be identical. Thus all combinations of
+ * different data layouts for multi-plane formats must be enumerated as
+ * separate modifiers.
+ *
+ * All of the entries in @handles, @pitches, @offsets and @modifier must be
+ * zero when unused. Warning, for @offsets and @modifier zero can't be used to
+ * figure out whether the entry is used or not since it's a valid value (a zero
+ * offset is common, and a zero modifier is &DRM_FORMAT_MOD_LINEAR).
+ */
+struct drm_mode_fb_cmd2 {
+	/** @fb_id: Object ID of the frame-buffer. */
+	__u32 fb_id;
+	/** @width: Width of the frame-buffer. */
+	__u32 width;
+	/** @height: Height of the frame-buffer. */
+	__u32 height;
+	/**
+	 * @pixel_format: FourCC format code, see ``DRM_FORMAT_*`` constants in
+	 * ``drm_fourcc.h``.
+	 */
+	__u32 pixel_format;
+	/**
+	 * @flags: Frame-buffer flags (see &DRM_MODE_FB_INTERLACED and
+	 * &DRM_MODE_FB_MODIFIERS).
+	 */
+	__u32 flags;
+
+	/**
+	 * @handles: GEM buffer handle, one per plane. Set to 0 if the plane is
+	 * unused. The same handle can be used for multiple planes.
+	 */
+	__u32 handles[4];
+	/** @pitches: Pitch (aka. stride) in bytes, one per plane. */
+	__u32 pitches[4];
+	/** @offsets: Offset into the buffer in bytes, one per plane. */
+	__u32 offsets[4];
+	/**
+	 * @modifier: Format modifier, one per plane. See ``DRM_FORMAT_MOD_*``
+	 * constants in ``drm_fourcc.h``. All planes must use the same
+	 * modifier. Ignored unless &DRM_MODE_FB_MODIFIERS is set in @flags.
+	 */
+	__u64 modifier[4];
+};
+
+#define DRM_MODE_FB_DIRTY_ANNOTATE_COPY 0x01
+#define DRM_MODE_FB_DIRTY_ANNOTATE_FILL 0x02
+#define DRM_MODE_FB_DIRTY_FLAGS         0x03
+
+#define DRM_MODE_FB_DIRTY_MAX_CLIPS     256
+
+/*
+ * Mark a region of a framebuffer as dirty.
+ *
+ * Some hardware does not automatically update display contents
+ * as a hardware or software draw to a framebuffer. This ioctl
+ * allows userspace to tell the kernel and the hardware what
+ * regions of the framebuffer have changed.
+ *
+ * The kernel or hardware is free to update more then just the
+ * region specified by the clip rects. The kernel or hardware
+ * may also delay and/or coalesce several calls to dirty into a
+ * single update.
+ *
+ * Userspace may annotate the updates, the annotates are a
+ * promise made by the caller that the change is either a copy
+ * of pixels or a fill of a single color in the region specified.
+ *
+ * If the DRM_MODE_FB_DIRTY_ANNOTATE_COPY flag is given then
+ * the number of updated regions are half of num_clips given,
+ * where the clip rects are paired in src and dst. The width and
+ * height of each one of the pairs must match.
+ *
+ * If the DRM_MODE_FB_DIRTY_ANNOTATE_FILL flag is given the caller
+ * promises that the region specified of the clip rects is filled
+ * completely with a single color as given in the color argument.
+ */
+
+struct drm_mode_fb_dirty_cmd {
+	__u32 fb_id;
+	__u32 flags;
+	__u32 color;
+	__u32 num_clips;
+	__u64 clips_ptr;
+};
+
+struct drm_mode_mode_cmd {
+	__u32 connector_id;
+	struct drm_mode_modeinfo mode;
+};
+
+#define DRM_MODE_CURSOR_BO	0x01
+#define DRM_MODE_CURSOR_MOVE	0x02
+#define DRM_MODE_CURSOR_FLAGS	0x03
+
+/*
+ * depending on the value in flags different members are used.
+ *
+ * CURSOR_BO uses
+ *    crtc_id
+ *    width
+ *    height
+ *    handle - if 0 turns the cursor off
+ *
+ * CURSOR_MOVE uses
+ *    crtc_id
+ *    x
+ *    y
+ */
+struct drm_mode_cursor {
+	__u32 flags;
+	__u32 crtc_id;
+	__s32 x;
+	__s32 y;
+	__u32 width;
+	__u32 height;
+	/* driver specific handle */
+	__u32 handle;
+};
+
+struct drm_mode_cursor2 {
+	__u32 flags;
+	__u32 crtc_id;
+	__s32 x;
+	__s32 y;
+	__u32 width;
+	__u32 height;
+	/* driver specific handle */
+	__u32 handle;
+	__s32 hot_x;
+	__s32 hot_y;
+};
+
+struct drm_mode_crtc_lut {
+	__u32 crtc_id;
+	__u32 gamma_size;
+
+	/* pointers to arrays */
+	__u64 red;
+	__u64 green;
+	__u64 blue;
+};
+
+struct drm_color_ctm {
+	/*
+	 * Conversion matrix in S31.32 sign-magnitude
+	 * (not two's complement!) format.
+	 *
+	 * out   matrix    in
+	 * |R|   |0 1 2|   |R|
+	 * |G| = |3 4 5| x |G|
+	 * |B|   |6 7 8|   |B|
+	 */
+	__u64 matrix[9];
+};
+
+struct drm_color_lut {
+	/*
+	 * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and
+	 * 0xffff == 1.0.
+	 */
+	__u16 red;
+	__u16 green;
+	__u16 blue;
+	__u16 reserved;
+};
+
+/**
+ * struct drm_plane_size_hint - Plane size hints
+ * @width: The width of the plane in pixel
+ * @height: The height of the plane in pixel
+ *
+ * The plane SIZE_HINTS property blob contains an
+ * array of struct drm_plane_size_hint.
+ */
+struct drm_plane_size_hint {
+	__u16 width;
+	__u16 height;
+};
+
+/**
+ * struct hdr_metadata_infoframe - HDR Metadata Infoframe Data.
+ *
+ * HDR Metadata Infoframe as per CTA 861.G spec. This is expected
+ * to match exactly with the spec.
+ *
+ * Userspace is expected to pass the metadata information as per
+ * the format described in this structure.
+ */
+struct hdr_metadata_infoframe {
+	/**
+	 * @eotf: Electro-Optical Transfer Function (EOTF)
+	 * used in the stream.
+	 */
+	__u8 eotf;
+	/**
+	 * @metadata_type: Static_Metadata_Descriptor_ID.
+	 */
+	__u8 metadata_type;
+	/**
+	 * @display_primaries: Color Primaries of the Data.
+	 * These are coded as unsigned 16-bit values in units of
+	 * 0.00002, where 0x0000 represents zero and 0xC350
+	 * represents 1.0000.
+	 * @display_primaries.x: X coordinate of color primary.
+	 * @display_primaries.y: Y coordinate of color primary.
+	 */
+	struct {
+		__u16 x, y;
+	} display_primaries[3];
+	/**
+	 * @white_point: White Point of Colorspace Data.
+	 * These are coded as unsigned 16-bit values in units of
+	 * 0.00002, where 0x0000 represents zero and 0xC350
+	 * represents 1.0000.
+	 * @white_point.x: X coordinate of whitepoint of color primary.
+	 * @white_point.y: Y coordinate of whitepoint of color primary.
+	 */
+	struct {
+		__u16 x, y;
+	} white_point;
+	/**
+	 * @max_display_mastering_luminance: Max Mastering Display Luminance.
+	 * This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
+	 * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
+	 */
+	__u16 max_display_mastering_luminance;
+	/**
+	 * @min_display_mastering_luminance: Min Mastering Display Luminance.
+	 * This value is coded as an unsigned 16-bit value in units of
+	 * 0.0001 cd/m2, where 0x0001 represents 0.0001 cd/m2 and 0xFFFF
+	 * represents 6.5535 cd/m2.
+	 */
+	__u16 min_display_mastering_luminance;
+	/**
+	 * @max_cll: Max Content Light Level.
+	 * This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
+	 * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
+	 */
+	__u16 max_cll;
+	/**
+	 * @max_fall: Max Frame Average Light Level.
+	 * This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
+	 * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
+	 */
+	__u16 max_fall;
+};
+
+/**
+ * struct hdr_output_metadata - HDR output metadata
+ *
+ * Metadata Information to be passed from userspace
+ */
+struct hdr_output_metadata {
+	/**
+	 * @metadata_type: Static_Metadata_Descriptor_ID.
+	 */
+	__u32 metadata_type;
+	/**
+	 * @hdmi_metadata_type1: HDR Metadata Infoframe.
+	 */
+	union {
+		struct hdr_metadata_infoframe hdmi_metadata_type1;
+	};
+};
+
+/**
+ * DRM_MODE_PAGE_FLIP_EVENT
+ *
+ * Request that the kernel sends back a vblank event (see
+ * struct drm_event_vblank) with the &DRM_EVENT_FLIP_COMPLETE type when the
+ * page-flip is done.
+ */
+#define DRM_MODE_PAGE_FLIP_EVENT 0x01
+/**
+ * DRM_MODE_PAGE_FLIP_ASYNC
+ *
+ * Request that the page-flip is performed as soon as possible, ie. with no
+ * delay due to waiting for vblank. This may cause tearing to be visible on
+ * the screen.
+ *
+ * When used with atomic uAPI, the driver will return an error if the hardware
+ * doesn't support performing an asynchronous page-flip for this update.
+ * User-space should handle this, e.g. by falling back to a regular page-flip.
+ *
+ * Note, some hardware might need to perform one last synchronous page-flip
+ * before being able to switch to asynchronous page-flips. As an exception,
+ * the driver will return success even though that first page-flip is not
+ * asynchronous.
+ */
+#define DRM_MODE_PAGE_FLIP_ASYNC 0x02
+#define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4
+#define DRM_MODE_PAGE_FLIP_TARGET_RELATIVE 0x8
+#define DRM_MODE_PAGE_FLIP_TARGET (DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE | \
+				   DRM_MODE_PAGE_FLIP_TARGET_RELATIVE)
+/**
+ * DRM_MODE_PAGE_FLIP_FLAGS
+ *
+ * Bitmask of flags suitable for &drm_mode_crtc_page_flip_target.flags.
+ */
+#define DRM_MODE_PAGE_FLIP_FLAGS (DRM_MODE_PAGE_FLIP_EVENT | \
+				  DRM_MODE_PAGE_FLIP_ASYNC | \
+				  DRM_MODE_PAGE_FLIP_TARGET)
+
+/*
+ * Request a page flip on the specified crtc.
+ *
+ * This ioctl will ask KMS to schedule a page flip for the specified
+ * crtc.  Once any pending rendering targeting the specified fb (as of
+ * ioctl time) has completed, the crtc will be reprogrammed to display
+ * that fb after the next vertical refresh.  The ioctl returns
+ * immediately, but subsequent rendering to the current fb will block
+ * in the execbuffer ioctl until the page flip happens.  If a page
+ * flip is already pending as the ioctl is called, EBUSY will be
+ * returned.
+ *
+ * Flag DRM_MODE_PAGE_FLIP_EVENT requests that drm sends back a vblank
+ * event (see drm.h: struct drm_event_vblank) when the page flip is
+ * done.  The user_data field passed in with this ioctl will be
+ * returned as the user_data field in the vblank event struct.
+ *
+ * Flag DRM_MODE_PAGE_FLIP_ASYNC requests that the flip happen
+ * 'as soon as possible', meaning that it not delay waiting for vblank.
+ * This may cause tearing on the screen.
+ *
+ * The reserved field must be zero.
+ */
+
+struct drm_mode_crtc_page_flip {
+	__u32 crtc_id;
+	__u32 fb_id;
+	__u32 flags;
+	__u32 reserved;
+	__u64 user_data;
+};
+
+/*
+ * Request a page flip on the specified crtc.
+ *
+ * Same as struct drm_mode_crtc_page_flip, but supports new flags and
+ * re-purposes the reserved field:
+ *
+ * The sequence field must be zero unless either of the
+ * DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags is specified. When
+ * the ABSOLUTE flag is specified, the sequence field denotes the absolute
+ * vblank sequence when the flip should take effect. When the RELATIVE
+ * flag is specified, the sequence field denotes the relative (to the
+ * current one when the ioctl is called) vblank sequence when the flip
+ * should take effect. NOTE: DRM_IOCTL_WAIT_VBLANK must still be used to
+ * make sure the vblank sequence before the target one has passed before
+ * calling this ioctl. The purpose of the
+ * DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags is merely to clarify
+ * the target for when code dealing with a page flip runs during a
+ * vertical blank period.
+ */
+
+struct drm_mode_crtc_page_flip_target {
+	__u32 crtc_id;
+	__u32 fb_id;
+	__u32 flags;
+	__u32 sequence;
+	__u64 user_data;
+};
+
+/**
+ * struct drm_mode_create_dumb - Create a KMS dumb buffer for scanout.
+ * @height: buffer height in pixels
+ * @width: buffer width in pixels
+ * @bpp: bits per pixel
+ * @flags: must be zero
+ * @handle: buffer object handle
+ * @pitch: number of bytes between two consecutive lines
+ * @size: size of the whole buffer in bytes
+ *
+ * User-space fills @height, @width, @bpp and @flags. If the IOCTL succeeds,
+ * the kernel fills @handle, @pitch and @size.
+ */
+struct drm_mode_create_dumb {
+	__u32 height;
+	__u32 width;
+	__u32 bpp;
+	__u32 flags;
+
+	__u32 handle;
+	__u32 pitch;
+	__u64 size;
+};
+
+/* set up for mmap of a dumb scanout buffer */
+struct drm_mode_map_dumb {
+	/** Handle for the object being mapped. */
+	__u32 handle;
+	__u32 pad;
+	/**
+	 * Fake offset to use for subsequent mmap call
+	 *
+	 * This is a fixed-size type for 32/64 compatibility.
+	 */
+	__u64 offset;
+};
+
+struct drm_mode_destroy_dumb {
+	__u32 handle;
+};
+
+/**
+ * DRM_MODE_ATOMIC_TEST_ONLY
+ *
+ * Do not apply the atomic commit, instead check whether the hardware supports
+ * this configuration.
+ *
+ * See &drm_mode_config_funcs.atomic_check for more details on test-only
+ * commits.
+ */
+#define DRM_MODE_ATOMIC_TEST_ONLY 0x0100
+/**
+ * DRM_MODE_ATOMIC_NONBLOCK
+ *
+ * Do not block while applying the atomic commit. The &DRM_IOCTL_MODE_ATOMIC
+ * IOCTL returns immediately instead of waiting for the changes to be applied
+ * in hardware. Note, the driver will still check that the update can be
+ * applied before retuning.
+ */
+#define DRM_MODE_ATOMIC_NONBLOCK  0x0200
+/**
+ * DRM_MODE_ATOMIC_ALLOW_MODESET
+ *
+ * Allow the update to result in temporary or transient visible artifacts while
+ * the update is being applied. Applying the update may also take significantly
+ * more time than a page flip. All visual artifacts will disappear by the time
+ * the update is completed, as signalled through the vblank event's timestamp
+ * (see struct drm_event_vblank).
+ *
+ * This flag must be set when the KMS update might cause visible artifacts.
+ * Without this flag such KMS update will return a EINVAL error. What kind of
+ * update may cause visible artifacts depends on the driver and the hardware.
+ * User-space that needs to know beforehand if an update might cause visible
+ * artifacts can use &DRM_MODE_ATOMIC_TEST_ONLY without
+ * &DRM_MODE_ATOMIC_ALLOW_MODESET to see if it fails.
+ *
+ * To the best of the driver's knowledge, visual artifacts are guaranteed to
+ * not appear when this flag is not set. Some sinks might display visual
+ * artifacts outside of the driver's control.
+ */
+#define DRM_MODE_ATOMIC_ALLOW_MODESET 0x0400
+
+/**
+ * DRM_MODE_ATOMIC_FLAGS
+ *
+ * Bitfield of flags accepted by the &DRM_IOCTL_MODE_ATOMIC IOCTL in
+ * &drm_mode_atomic.flags.
+ */
+#define DRM_MODE_ATOMIC_FLAGS (\
+		DRM_MODE_PAGE_FLIP_EVENT |\
+		DRM_MODE_PAGE_FLIP_ASYNC |\
+		DRM_MODE_ATOMIC_TEST_ONLY |\
+		DRM_MODE_ATOMIC_NONBLOCK |\
+		DRM_MODE_ATOMIC_ALLOW_MODESET)
+
+struct drm_mode_atomic {
+	__u32 flags;
+	__u32 count_objs;
+	__u64 objs_ptr;
+	__u64 count_props_ptr;
+	__u64 props_ptr;
+	__u64 prop_values_ptr;
+	__u64 reserved;
+	__u64 user_data;
+};
+
+struct drm_format_modifier_blob {
+#define FORMAT_BLOB_CURRENT 1
+	/* Version of this blob format */
+	__u32 version;
+
+	/* Flags */
+	__u32 flags;
+
+	/* Number of fourcc formats supported */
+	__u32 count_formats;
+
+	/* Where in this blob the formats exist (in bytes) */
+	__u32 formats_offset;
+
+	/* Number of drm_format_modifiers */
+	__u32 count_modifiers;
+
+	/* Where in this blob the modifiers exist (in bytes) */
+	__u32 modifiers_offset;
+
+	/* __u32 formats[] */
+	/* struct drm_format_modifier modifiers[] */
+};
+
+struct drm_format_modifier {
+	/* Bitmask of formats in get_plane format list this info applies to. The
+	 * offset allows a sliding window of which 64 formats (bits).
+	 *
+	 * Some examples:
+	 * In today's world with < 65 formats, and formats 0, and 2 are
+	 * supported
+	 * 0x0000000000000005
+	 *		  ^-offset = 0, formats = 5
+	 *
+	 * If the number formats grew to 128, and formats 98-102 are
+	 * supported with the modifier:
+	 *
+	 * 0x0000007c00000000 0000000000000000
+	 *		  ^
+	 *		  |__offset = 64, formats = 0x7c00000000
+	 *
+	 */
+	__u64 formats;
+	__u32 offset;
+	__u32 pad;
+
+	/* The modifier that applies to the >get_plane format list bitmask. */
+	__u64 modifier;
+};
+
+/**
+ * struct drm_mode_create_blob - Create New blob property
+ *
+ * Create a new 'blob' data property, copying length bytes from data pointer,
+ * and returning new blob ID.
+ */
+struct drm_mode_create_blob {
+	/** @data: Pointer to data to copy. */
+	__u64 data;
+	/** @length: Length of data to copy. */
+	__u32 length;
+	/** @blob_id: Return: new property ID. */
+	__u32 blob_id;
+};
+
+/**
+ * struct drm_mode_destroy_blob - Destroy user blob
+ * @blob_id: blob_id to destroy
+ *
+ * Destroy a user-created blob property.
+ *
+ * User-space can release blobs as soon as they do not need to refer to them by
+ * their blob object ID.  For instance, if you are using a MODE_ID blob in an
+ * atomic commit and you will not make another commit re-using the same ID, you
+ * can destroy the blob as soon as the commit has been issued, without waiting
+ * for it to complete.
+ */
+struct drm_mode_destroy_blob {
+	__u32 blob_id;
+};
+
+/**
+ * struct drm_mode_create_lease - Create lease
+ *
+ * Lease mode resources, creating another drm_master.
+ *
+ * The @object_ids array must reference at least one CRTC, one connector and
+ * one plane if &DRM_CLIENT_CAP_UNIVERSAL_PLANES is enabled. Alternatively,
+ * the lease can be completely empty.
+ */
+struct drm_mode_create_lease {
+	/** @object_ids: Pointer to array of object ids (__u32) */
+	__u64 object_ids;
+	/** @object_count: Number of object ids */
+	__u32 object_count;
+	/** @flags: flags for new FD (O_CLOEXEC, etc) */
+	__u32 flags;
+
+	/** @lessee_id: Return: unique identifier for lessee. */
+	__u32 lessee_id;
+	/** @fd: Return: file descriptor to new drm_master file */
+	__u32 fd;
+};
+
+/**
+ * struct drm_mode_list_lessees - List lessees
+ *
+ * List lesses from a drm_master.
+ */
+struct drm_mode_list_lessees {
+	/**
+	 * @count_lessees: Number of lessees.
+	 *
+	 * On input, provides length of the array.
+	 * On output, provides total number. No
+	 * more than the input number will be written
+	 * back, so two calls can be used to get
+	 * the size and then the data.
+	 */
+	__u32 count_lessees;
+	/** @pad: Padding. */
+	__u32 pad;
+
+	/**
+	 * @lessees_ptr: Pointer to lessees.
+	 *
+	 * Pointer to __u64 array of lessee ids
+	 */
+	__u64 lessees_ptr;
+};
+
+/**
+ * struct drm_mode_get_lease - Get Lease
+ *
+ * Get leased objects.
+ */
+struct drm_mode_get_lease {
+	/**
+	 * @count_objects: Number of leased objects.
+	 *
+	 * On input, provides length of the array.
+	 * On output, provides total number. No
+	 * more than the input number will be written
+	 * back, so two calls can be used to get
+	 * the size and then the data.
+	 */
+	__u32 count_objects;
+	/** @pad: Padding. */
+	__u32 pad;
+
+	/**
+	 * @objects_ptr: Pointer to objects.
+	 *
+	 * Pointer to __u32 array of object ids.
+	 */
+	__u64 objects_ptr;
+};
+
+/**
+ * struct drm_mode_revoke_lease - Revoke lease
+ */
+struct drm_mode_revoke_lease {
+	/** @lessee_id: Unique ID of lessee */
+	__u32 lessee_id;
+};
+
+/**
+ * struct drm_mode_rect - Two dimensional rectangle.
+ * @x1: Horizontal starting coordinate (inclusive).
+ * @y1: Vertical starting coordinate (inclusive).
+ * @x2: Horizontal ending coordinate (exclusive).
+ * @y2: Vertical ending coordinate (exclusive).
+ *
+ * With drm subsystem using struct drm_rect to manage rectangular area this
+ * export it to user-space.
+ *
+ * Currently used by drm_mode_atomic blob property FB_DAMAGE_CLIPS.
+ */
+struct drm_mode_rect {
+	__s32 x1;
+	__s32 y1;
+	__s32 x2;
+	__s32 y2;
+};
+
+/**
+ * struct drm_mode_closefb
+ * @fb_id: Framebuffer ID.
+ * @pad: Must be zero.
+ */
+struct drm_mode_closefb {
+	__u32 fb_id;
+	__u32 pad;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/plugins/amdgpu/kfd_ioctl.h b/plugins/amdgpu/kfd_ioctl.h
index b88fe20cf..a63d453f0 100644
--- a/plugins/amdgpu/kfd_ioctl.h
+++ b/plugins/amdgpu/kfd_ioctl.h
@@ -23,9 +23,12 @@
 #ifndef KFD_IOCTL_H_INCLUDED
 #define KFD_IOCTL_H_INCLUDED
 
-#include <drm/drm.h>
 #include <linux/ioctl.h>
 
+/* Define __user as empty for kernel headers in user-space */
+#define __user
+#include "drm.h"
+
 /*
  * - 1.1 - initial version
  * - 1.3 - Add SMI events support
@@ -39,8 +42,8 @@
 #define KFD_IOCTL_MINOR_VERSION 8
 
 struct kfd_ioctl_get_version_args {
-	__u32 major_version; /* from KFD */
-	__u32 minor_version; /* from KFD */
+	uint32_t major_version; /* from KFD */
+	uint32_t minor_version; /* from KFD */
 };
 
 /* For kfd_ioctl_create_queue_args.queue_type. */
@@ -53,51 +56,51 @@ struct kfd_ioctl_get_version_args {
 #define KFD_MAX_QUEUE_PRIORITY	 15
 
 struct kfd_ioctl_create_queue_args {
-	__u64 ring_base_address;     /* to KFD */
-	__u64 write_pointer_address; /* from KFD */
-	__u64 read_pointer_address;  /* from KFD */
-	__u64 doorbell_offset;	     /* from KFD */
+	uint64_t ring_base_address;	/* to KFD */
+	uint64_t write_pointer_address; /* from KFD */
+	uint64_t read_pointer_address;	/* from KFD */
+	uint64_t doorbell_offset;	/* from KFD */
 
-	__u32 ring_size;	/* to KFD */
-	__u32 gpu_id;		/* to KFD */
-	__u32 queue_type;	/* to KFD */
-	__u32 queue_percentage; /* to KFD */
-	__u32 queue_priority;	/* to KFD */
-	__u32 queue_id;		/* from KFD */
+	uint32_t ring_size;	   /* to KFD */
+	uint32_t gpu_id;	   /* to KFD */
+	uint32_t queue_type;	   /* to KFD */
+	uint32_t queue_percentage; /* to KFD */
+	uint32_t queue_priority;   /* to KFD */
+	uint32_t queue_id;	   /* from KFD */
 
-	__u64 eop_buffer_address;	/* to KFD */
-	__u64 eop_buffer_size;		/* to KFD */
-	__u64 ctx_save_restore_address; /* to KFD */
-	__u32 ctx_save_restore_size;	/* to KFD */
-	__u32 ctl_stack_size;		/* to KFD */
+	uint64_t eop_buffer_address;	   /* to KFD */
+	uint64_t eop_buffer_size;	   /* to KFD */
+	uint64_t ctx_save_restore_address; /* to KFD */
+	uint32_t ctx_save_restore_size;	   /* to KFD */
+	uint32_t ctl_stack_size;	   /* to KFD */
 };
 
 struct kfd_ioctl_destroy_queue_args {
-	__u32 queue_id; /* to KFD */
-	__u32 pad;
+	uint32_t queue_id; /* to KFD */
+	uint32_t pad;
 };
 
 struct kfd_ioctl_update_queue_args {
-	__u64 ring_base_address; /* to KFD */
+	uint64_t ring_base_address; /* to KFD */
 
-	__u32 queue_id;		/* to KFD */
-	__u32 ring_size;	/* to KFD */
-	__u32 queue_percentage; /* to KFD */
-	__u32 queue_priority;	/* to KFD */
+	uint32_t queue_id;	   /* to KFD */
+	uint32_t ring_size;	   /* to KFD */
+	uint32_t queue_percentage; /* to KFD */
+	uint32_t queue_priority;   /* to KFD */
 };
 
 struct kfd_ioctl_set_cu_mask_args {
-	__u32 queue_id;	   /* to KFD */
-	__u32 num_cu_mask; /* to KFD */
-	__u64 cu_mask_ptr; /* to KFD */
+	uint32_t queue_id;    /* to KFD */
+	uint32_t num_cu_mask; /* to KFD */
+	uint64_t cu_mask_ptr; /* to KFD */
 };
 
 struct kfd_ioctl_get_queue_wave_state_args {
-	__u64 ctl_stack_address;   /* to KFD */
-	__u32 ctl_stack_used_size; /* from KFD */
-	__u32 save_area_used_size; /* from KFD */
-	__u32 queue_id;		   /* to KFD */
-	__u32 pad;
+	uint64_t ctl_stack_address;   /* to KFD */
+	uint32_t ctl_stack_used_size; /* from KFD */
+	uint32_t save_area_used_size; /* from KFD */
+	uint32_t queue_id;	      /* to KFD */
+	uint32_t pad;
 };
 
 /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
@@ -105,13 +108,13 @@ struct kfd_ioctl_get_queue_wave_state_args {
 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
 
 struct kfd_ioctl_set_memory_policy_args {
-	__u64 alternate_aperture_base; /* to KFD */
-	__u64 alternate_aperture_size; /* to KFD */
+	uint64_t alternate_aperture_base; /* to KFD */
+	uint64_t alternate_aperture_size; /* to KFD */
 
-	__u32 gpu_id;		/* to KFD */
-	__u32 default_policy;	/* to KFD */
-	__u32 alternate_policy; /* to KFD */
-	__u32 pad;
+	uint32_t gpu_id;	   /* to KFD */
+	uint32_t default_policy;   /* to KFD */
+	uint32_t alternate_policy; /* to KFD */
+	uint32_t pad;
 };
 
 /*
@@ -122,24 +125,24 @@ struct kfd_ioctl_set_memory_policy_args {
  */
 
 struct kfd_ioctl_get_clock_counters_args {
-	__u64 gpu_clock_counter;    /* from KFD */
-	__u64 cpu_clock_counter;    /* from KFD */
-	__u64 system_clock_counter; /* from KFD */
-	__u64 system_clock_freq;    /* from KFD */
+	uint64_t gpu_clock_counter;    /* from KFD */
+	uint64_t cpu_clock_counter;    /* from KFD */
+	uint64_t system_clock_counter; /* from KFD */
+	uint64_t system_clock_freq;    /* from KFD */
 
-	__u32 gpu_id; /* to KFD */
-	__u32 pad;
+	uint32_t gpu_id; /* to KFD */
+	uint32_t pad;
 };
 
 struct kfd_process_device_apertures {
-	__u64 lds_base;	     /* from KFD */
-	__u64 lds_limit;     /* from KFD */
-	__u64 scratch_base;  /* from KFD */
-	__u64 scratch_limit; /* from KFD */
-	__u64 gpuvm_base;    /* from KFD */
-	__u64 gpuvm_limit;   /* from KFD */
-	__u32 gpu_id;	     /* from KFD */
-	__u32 pad;
+	uint64_t lds_base;	/* from KFD */
+	uint64_t lds_limit;	/* from KFD */
+	uint64_t scratch_base;	/* from KFD */
+	uint64_t scratch_limit; /* from KFD */
+	uint64_t gpuvm_base;	/* from KFD */
+	uint64_t gpuvm_limit;	/* from KFD */
+	uint32_t gpu_id;	/* from KFD */
+	uint32_t pad;
 };
 
 /*
@@ -152,20 +155,20 @@ struct kfd_ioctl_get_process_apertures_args {
 	struct kfd_process_device_apertures process_apertures[NUM_OF_SUPPORTED_GPUS]; /* from KFD */
 
 	/* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */
-	__u32 num_of_nodes;
-	__u32 pad;
+	uint32_t num_of_nodes;
+	uint32_t pad;
 };
 
 struct kfd_ioctl_get_process_apertures_new_args {
 	/* User allocated. Pointer to struct kfd_process_device_apertures
 	 * filled in by Kernel
 	 */
-	__u64 kfd_process_device_apertures_ptr;
+	uint64_t kfd_process_device_apertures_ptr;
 	/* to KFD - indicates amount of memory present in kfd_process_device_apertures_ptr
 	 * from KFD - Number of entries filled by KFD.
 	 */
-	__u32 num_of_nodes;
-	__u32 pad;
+	uint32_t num_of_nodes;
+	uint32_t pad;
 };
 
 #define MAX_ALLOWED_NUM_POINTS	  100
@@ -173,25 +176,25 @@ struct kfd_ioctl_get_process_apertures_new_args {
 #define MAX_ALLOWED_WAC_BUFF_SIZE 128
 
 struct kfd_ioctl_dbg_register_args {
-	__u32 gpu_id; /* to KFD */
-	__u32 pad;
+	uint32_t gpu_id; /* to KFD */
+	uint32_t pad;
 };
 
 struct kfd_ioctl_dbg_unregister_args {
-	__u32 gpu_id; /* to KFD */
-	__u32 pad;
+	uint32_t gpu_id; /* to KFD */
+	uint32_t pad;
 };
 
 struct kfd_ioctl_dbg_address_watch_args {
-	__u64 content_ptr;	 /* a pointer to the actual content */
-	__u32 gpu_id;		 /* to KFD */
-	__u32 buf_size_in_bytes; /*including gpu_id and buf_size */
+	uint64_t content_ptr;	    /* a pointer to the actual content */
+	uint32_t gpu_id;	    /* to KFD */
+	uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
 };
 
 struct kfd_ioctl_dbg_wave_control_args {
-	__u64 content_ptr;	 /* a pointer to the actual content */
-	__u32 gpu_id;		 /* to KFD */
-	__u32 buf_size_in_bytes; /*including gpu_id and buf_size */
+	uint64_t content_ptr;	    /* a pointer to the actual content */
+	uint32_t gpu_id;	    /* to KFD */
+	uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
 };
 
 #define KFD_INVALID_FD 0xffffffff
@@ -228,43 +231,43 @@ struct kfd_ioctl_dbg_wave_control_args {
 #define KFD_MEM_ERR_GPU_HANG	    3
 
 struct kfd_ioctl_create_event_args {
-	__u64 event_page_offset;  /* from KFD */
-	__u32 event_trigger_data; /* from KFD - signal events only */
-	__u32 event_type;	  /* to KFD */
-	__u32 auto_reset;	  /* to KFD */
-	__u32 node_id;		  /* to KFD - only valid for certain event types */
-	__u32 event_id;		  /* from KFD */
-	__u32 event_slot_index;	  /* from KFD */
+	uint64_t event_page_offset;  /* from KFD */
+	uint32_t event_trigger_data; /* from KFD - signal events only */
+	uint32_t event_type;	     /* to KFD */
+	uint32_t auto_reset;	     /* to KFD */
+	uint32_t node_id;	     /* to KFD - only valid for certain event types */
+	uint32_t event_id;	     /* from KFD */
+	uint32_t event_slot_index;   /* from KFD */
 };
 
 struct kfd_ioctl_destroy_event_args {
-	__u32 event_id; /* to KFD */
-	__u32 pad;
+	uint32_t event_id; /* to KFD */
+	uint32_t pad;
 };
 
 struct kfd_ioctl_set_event_args {
-	__u32 event_id; /* to KFD */
-	__u32 pad;
+	uint32_t event_id; /* to KFD */
+	uint32_t pad;
 };
 
 struct kfd_ioctl_reset_event_args {
-	__u32 event_id; /* to KFD */
-	__u32 pad;
+	uint32_t event_id; /* to KFD */
+	uint32_t pad;
 };
 
 struct kfd_memory_exception_failure {
-	__u32 NotPresent; /* Page not present or supervisor privilege */
-	__u32 ReadOnly;	  /* Write access to a read-only page */
-	__u32 NoExecute;  /* Execute access to a page marked NX */
-	__u32 imprecise;  /* Can't determine the exact fault address */
+	uint32_t NotPresent; /* Page not present or supervisor privilege */
+	uint32_t ReadOnly;   /* Write access to a read-only page */
+	uint32_t NoExecute;  /* Execute access to a page marked NX */
+	uint32_t imprecise;  /* Can't determine the exact fault address */
 };
 
 /* memory exception data */
 struct kfd_hsa_memory_exception_data {
 	struct kfd_memory_exception_failure failure;
-	__u64 va;
-	__u32 gpu_id;
-	__u32 ErrorType; /* 0 = no RAS error,
+	uint64_t va;
+	uint32_t gpu_id;
+	uint32_t ErrorType; /* 0 = no RAS error,
 			  * 1 = ECC_SRAM,
 			  * 2 = Link_SYNFLOOD (poison),
 			  * 3 = GPU hang (not attributable to a specific cause),
@@ -274,10 +277,10 @@ struct kfd_hsa_memory_exception_data {
 
 /* hw exception data */
 struct kfd_hsa_hw_exception_data {
-	__u32 reset_type;
-	__u32 reset_cause;
-	__u32 memory_lost;
-	__u32 gpu_id;
+	uint32_t reset_type;
+	uint32_t reset_cause;
+	uint32_t memory_lost;
+	uint32_t gpu_id;
 };
 
 /* Event data */
@@ -286,57 +289,57 @@ struct kfd_event_data {
 		struct kfd_hsa_memory_exception_data memory_exception_data;
 		struct kfd_hsa_hw_exception_data hw_exception_data;
 	};			  /* From KFD */
-	__u64 kfd_event_data_ext; /* pointer to an extension structure for future exception types */
-	__u32 event_id;		  /* to KFD */
-	__u32 pad;
+	uint64_t kfd_event_data_ext; /* pointer to an extension structure for future exception types */
+	uint32_t event_id;	     /* to KFD */
+	uint32_t pad;
 };
 
 struct kfd_ioctl_wait_events_args {
-	__u64 events_ptr;   /* pointed to struct kfd_event_data array, to KFD */
-	__u32 num_events;   /* to KFD */
-	__u32 wait_for_all; /* to KFD */
-	__u32 timeout;	    /* to KFD */
-	__u32 wait_result;  /* from KFD */
+	uint64_t events_ptr;   /* pointed to struct kfd_event_data array, to KFD */
+	uint32_t num_events;   /* to KFD */
+	uint32_t wait_for_all; /* to KFD */
+	uint32_t timeout;      /* to KFD */
+	uint32_t wait_result;  /* from KFD */
 };
 
 struct kfd_ioctl_set_scratch_backing_va_args {
-	__u64 va_addr; /* to KFD */
-	__u32 gpu_id;  /* to KFD */
-	__u32 pad;
+	uint64_t va_addr; /* to KFD */
+	uint32_t gpu_id;  /* to KFD */
+	uint32_t pad;
 };
 
 struct kfd_ioctl_get_tile_config_args {
 	/* to KFD: pointer to tile array */
-	__u64 tile_config_ptr;
+	uint64_t tile_config_ptr;
 	/* to KFD: pointer to macro tile array */
-	__u64 macro_tile_config_ptr;
+	uint64_t macro_tile_config_ptr;
 	/* to KFD: array size allocated by user mode
 	 * from KFD: array size filled by kernel
 	 */
-	__u32 num_tile_configs;
+	uint32_t num_tile_configs;
 	/* to KFD: array size allocated by user mode
 	 * from KFD: array size filled by kernel
 	 */
-	__u32 num_macro_tile_configs;
+	uint32_t num_macro_tile_configs;
 
-	__u32 gpu_id;	      /* to KFD */
-	__u32 gb_addr_config; /* from KFD */
-	__u32 num_banks;      /* from KFD */
-	__u32 num_ranks;      /* from KFD */
+	uint32_t gpu_id;	 /* to KFD */
+	uint32_t gb_addr_config; /* from KFD */
+	uint32_t num_banks;	 /* from KFD */
+	uint32_t num_ranks;	 /* from KFD */
 
 	/* struct size can be extended later if needed without breaking ABI compatibility */
 };
 
 struct kfd_ioctl_set_trap_handler_args {
-	__u64 tba_addr; /* to KFD */
-	__u64 tma_addr; /* to KFD */
-	__u32 gpu_id;	/* to KFD */
-	__u32 pad;
+	uint64_t tba_addr; /* to KFD */
+	uint64_t tma_addr; /* to KFD */
+	uint32_t gpu_id;   /* to KFD */
+	uint32_t pad;
 };
 
 struct kfd_ioctl_acquire_vm_args {
-	__u32 drm_fd; /* to KFD */
-	__u32 gpu_id; /* to KFD */
+	uint32_t drm_fd; /* to KFD */
+	uint32_t gpu_id; /* to KFD */
 };
 
 /* Allocation flags: memory types */
@@ -367,12 +370,12 @@ struct kfd_ioctl_acquire_vm_args {
  * @flags:       memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above
  */
 struct kfd_ioctl_alloc_memory_of_gpu_args {
-	__u64 va_addr;	   /* to KFD */
-	__u64 size;	   /* to KFD */
-	__u64 handle;	   /* from KFD */
-	__u64 mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */
-	__u32 gpu_id;	   /* to KFD */
-	__u32 flags;
+	uint64_t va_addr;     /* to KFD */
+	uint64_t size;	      /* to KFD */
+	uint64_t handle;      /* from KFD */
+	uint64_t mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */
+	uint32_t gpu_id;      /* to KFD */
+	uint32_t flags;
 };
 
 /* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu
@@ -380,13 +383,13 @@ struct kfd_ioctl_alloc_memory_of_gpu_args {
  * @handle: memory handle returned by alloc
  */
 struct kfd_ioctl_free_memory_of_gpu_args {
-	__u64 handle; /* to KFD */
+	uint64_t handle; /* to KFD */
 };
 
 /* Map memory to one or more GPUs
  *
  * @handle:                memory handle returned by alloc
- * @device_ids_array_ptr:  array of gpu_ids (__u32 per device)
+ * @device_ids_array_ptr:  array of gpu_ids (uint32_t per device)
  * @n_devices:             number of devices in the array
  * @n_success:             number of devices mapped successfully
  *
@@ -399,10 +402,10 @@ struct kfd_ioctl_free_memory_of_gpu_args {
  * n_devices.
  */
 struct kfd_ioctl_map_memory_to_gpu_args {
-	__u64 handle;		    /* to KFD */
-	__u64 device_ids_array_ptr; /* to KFD */
-	__u32 n_devices;	    /* to KFD */
-	__u32 n_success;	    /* to/from KFD */
+	uint64_t handle;	       /* to KFD */
+	uint64_t device_ids_array_ptr; /* to KFD */
+	uint32_t n_devices;	       /* to KFD */
+	uint32_t n_success;	       /* to/from KFD */
 };
 
 /* Unmap memory from one or more GPUs
@@ -410,10 +413,10 @@ struct kfd_ioctl_map_memory_to_gpu_args {
  * same arguments as for mapping
  */
 struct kfd_ioctl_unmap_memory_from_gpu_args {
-	__u64 handle;		    /* to KFD */
-	__u64 device_ids_array_ptr; /* to KFD */
-	__u32 n_devices;	    /* to KFD */
-	__u32 n_success;	    /* to/from KFD */
+	uint64_t handle;	       /* to KFD */
+	uint64_t device_ids_array_ptr; /* to KFD */
+	uint32_t n_devices;	       /* to KFD */
+	uint32_t n_success;	       /* to/from KFD */
 };
 
 /* Allocate GWS for specific queue
@@ -424,28 +427,28 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
  *               only support contiguous GWS allocation
  */
 struct kfd_ioctl_alloc_queue_gws_args {
-	__u32 queue_id;	 /* to KFD */
-	__u32 num_gws;	 /* to KFD */
-	__u32 first_gws; /* from KFD */
-	__u32 pad;
+	uint32_t queue_id;  /* to KFD */
+	uint32_t num_gws;   /* to KFD */
+	uint32_t first_gws; /* from KFD */
+	uint32_t pad;
 };
 
 struct kfd_ioctl_get_dmabuf_info_args {
-	__u64 size;	     /* from KFD */
-	__u64 metadata_ptr;  /* to KFD */
-	__u32 metadata_size; /* to KFD (space allocated by user)
+	uint64_t size;		/* from KFD */
+	uint64_t metadata_ptr;	/* to KFD */
+	uint32_t metadata_size; /* to KFD (space allocated by user)
 			      * from KFD (actual metadata size)
 			      */
-	__u32 gpu_id;	     /* from KFD */
-	__u32 flags;	     /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */
-	__u32 dmabuf_fd;     /* to KFD */
+	uint32_t gpu_id;	/* from KFD */
+	uint32_t flags;		/* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */
+	uint32_t dmabuf_fd;	/* to KFD */
 };
 
 struct kfd_ioctl_import_dmabuf_args {
-	__u64 va_addr;	 /* to KFD */
-	__u64 handle;	 /* from KFD */
-	__u32 gpu_id;	 /* to KFD */
-	__u32 dmabuf_fd; /* to KFD */
+	uint64_t va_addr;   /* to KFD */
+	uint64_t handle;    /* from KFD */
+	uint32_t gpu_id;    /* to KFD */
+	uint32_t dmabuf_fd; /* to KFD */
 };
 
 /*
@@ -463,8 +466,8 @@ enum kfd_smi_event {
 #define KFD_SMI_EVENT_MSG_SIZE		 96
 
 struct kfd_ioctl_smi_events_args {
-	__u32 gpuid;   /* to KFD */
-	__u32 anon_fd; /* from KFD */
+	uint32_t gpuid;	  /* to KFD */
+	uint32_t anon_fd; /* from KFD */
 };
 
 /**************************************************************************************************
@@ -510,33 +513,33 @@ enum kfd_criu_op {
  * Return: 0 on success, -errno on failure
  */
 struct kfd_ioctl_criu_args {
-	__u64 devices;	      /* Used during ops: CHECKPOINT, RESTORE */
-	__u64 bos;	      /* Used during ops: CHECKPOINT, RESTORE */
-	__u64 priv_data;      /* Used during ops: CHECKPOINT, RESTORE */
-	__u64 priv_data_size; /* Used during ops: PROCESS_INFO, RESTORE */
-	__u32 num_devices;    /* Used during ops: PROCESS_INFO, RESTORE */
-	__u32 num_bos;	      /* Used during ops: PROCESS_INFO, RESTORE */
-	__u32 num_objects;    /* Used during ops: PROCESS_INFO, RESTORE */
-	__u32 pid;	      /* Used during ops: PROCESS_INFO, RESUME */
-	__u32 op;
+	uint64_t devices;	 /* Used during ops: CHECKPOINT, RESTORE */
+	uint64_t bos;		 /* Used during ops: CHECKPOINT, RESTORE */
+	uint64_t priv_data;	 /* Used during ops: CHECKPOINT, RESTORE */
+	uint64_t priv_data_size; /* Used during ops: PROCESS_INFO, RESTORE */
+	uint32_t num_devices;	 /* Used during ops: PROCESS_INFO, RESTORE */
+	uint32_t num_bos;	 /* Used during ops: PROCESS_INFO, RESTORE */
+	uint32_t num_objects;	 /* Used during ops: PROCESS_INFO, RESTORE */
+	uint32_t pid;		 /* Used during ops: PROCESS_INFO, RESUME */
+	uint32_t op;
 };
 
 struct kfd_criu_device_bucket {
-	__u32 user_gpu_id;
-	__u32 actual_gpu_id;
-	__u32 drm_fd;
-	__u32 pad;
+	uint32_t user_gpu_id;
+	uint32_t actual_gpu_id;
+	uint32_t drm_fd;
+	uint32_t pad;
 };
 
 struct kfd_criu_bo_bucket {
-	__u64 addr;
-	__u64 size;
-	__u64 offset;
-	__u64 restored_offset; /* During restore, updated offset for BO */
-	__u32 gpu_id;	       /* This is the user_gpu_id */
-	__u32 alloc_flags;
-	__u32 dmabuf_fd;
-	__u32 pad;
+	uint64_t addr;
+	uint64_t size;
+	uint64_t offset;
+	uint64_t restored_offset; /* During restore, updated offset for BO */
+	uint32_t gpu_id;	  /* This is the user_gpu_id */
+	uint32_t alloc_flags;
+	uint32_t dmabuf_fd;
+	uint32_t pad;
 };
 
 /* CRIU IOCTLs - END */
@@ -616,8 +619,8 @@ enum kfd_ioctl_svm_attr_type {
  * @value: attribute value
  */
 struct kfd_ioctl_svm_attribute {
-	__u32 type;
-	__u32 value;
+	uint32_t type;
+	uint32_t value;
 };
 
 /**
@@ -659,10 +662,10 @@ struct kfd_ioctl_svm_attribute {
  * attribute type to indicate the access for the specified GPU.
  */
 struct kfd_ioctl_svm_args {
-	__u64 start_addr;
-	__u64 size;
-	__u32 op;
-	__u32 nattr;
+	uint64_t start_addr;
+	uint64_t size;
+	uint32_t op;
+	uint32_t nattr;
 	/* Variable length array of attributes */
 	struct kfd_ioctl_svm_attribute attrs[0];
 };
diff --git a/plugins/cuda/Makefile b/plugins/cuda/Makefile
new file mode 100644
index 000000000..2c1944a34
--- /dev/null
+++ b/plugins/cuda/Makefile
@@ -0,0 +1,40 @@
+PLUGIN_NAME := cuda_plugin
+PLUGIN_SOBJ := cuda_plugin.so
+
+DEPS_CUDA := $(PLUGIN_SOBJ)
+
+PLUGIN_INCLUDE  	:= -iquote../../include
+PLUGIN_INCLUDE  	+= -iquote../../criu/include
+PLUGIN_INCLUDE  	+= -iquote../../criu/arch/$(ARCH)/include/
+PLUGIN_INCLUDE  	+= -iquote../../
+
+COMPEL := ../../compel/compel-host
+
+PLUGIN_CFLAGS := -g -Wall -Werror -shared -nostartfiles -fPIC
+
+__nmk_dir ?= ../../scripts/nmk/scripts/
+include $(__nmk_dir)msg.mk
+
+all: $(DEPS_CUDA)
+
+cuda_plugin.so: cuda_plugin.c
+	$(call msg-gen, $@)
+	$(Q) $(CC) $(PLUGIN_CFLAGS) $(DEFINES) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS)
+
+clean:
+	$(call msg-clean, $@)
+	$(Q) $(RM) $(PLUGIN_SOBJ)
+.PHONY: clean
+
+mrproper: clean
+
+install:
+	$(Q) mkdir -p $(DESTDIR)$(PLUGINDIR)
+	$(E) "  INSTALL " $(PLUGIN_NAME)
+	$(Q) install -m 755 $(PLUGIN_SOBJ) $(DESTDIR)$(PLUGINDIR)
+.PHONY: install
+
+uninstall:
+	$(E) " UNINSTALL" $(PLUGIN_NAME)
+	$(Q) $(RM) $(DESTDIR)$(PLUGINDIR)/$(PLUGIN_SOBJ)
+.PHONY: uninstall
diff --git a/plugins/cuda/README.md b/plugins/cuda/README.md
new file mode 100644
index 000000000..7b91f6998
--- /dev/null
+++ b/plugins/cuda/README.md
@@ -0,0 +1,59 @@
+Checkpoint and Restore for CUDA applications with CRIU
+======================================================
+
+# Requirements
+The cuda-checkpoint utility should be placed somewhere in your $PATH and an r555
+or higher GPU driver is required for CUDA CRIU integration support.
+
+## cuda-checkpoint
+The cuda-checkpoint utility can be found at:
+https://github.com/NVIDIA/cuda-checkpoint
+
+cuda-checkpoint is a binary utility used to issue checkpointing commands to CUDA
+applications. Updating the cuda-checkpoint utility between driver releases
+should not be necessary as the utility simply exposes some extra driver behavior
+so driver updates are all that's needed to get access to newer features.
+
+# Checkpointing Procedure
+cuda-checkpoint exposes 4 actions used in the checkpointing process: lock,
+checkpoint, restore, unlock.
+
+* lock - Used with the PAUSE_DEVICES hook while a process is still running to
+  quiesce the application into a state where it can be checkpointed
+* checkpoint - Used with the CHECKPOINT_DEVICES hook once a process has been
+  seized/frozen to perform the actual checkpointing operation
+* restore/unlock - Used with the RESUME_DEVICES_LATE hook to restore the CUDA
+  state and release the process back to it's running state
+
+These actions are facilitated by a CUDA checkpoint+restore thread that the CUDA
+plugin will re-wake when needed.
+
+# Known Limitations
+* Currently GPU memory contents are brought into main system memory and CRIU
+  then checkpoints that as part of the normal procedure. On systems with many
+  GPU's with high GPU memory usage this can cause memory thrashing. A future
+  CUDA release will add support for dumping the memory contents to files to
+  alleviate this as well as support in the CRIU plugin.
+* There's currently a small race between when a PAUSE_DEVICES hook is called on
+  a running process and a process calls cuInit() and finishes initializing CUDA
+  after the PAUSE is issued but before the process is frozen to checkpoint. This
+  will cause cuda-checkpoint to report that the process is in an illegal state
+  for checkpointing and it's recommended to just attempt the CRIU procedure
+  again, this should be very rare.
+* Applications that use NVML will leave some leftover device references as NVML
+  is not currently supported for checkpointing. There will be support for this
+  in later drivers. A possible temporary workaround is to have the
+  {DUMP,RESTORE}_EXT_FILE hook just ignore /dev/nvidiactl and /dev/nvidia{0..N}
+  remaining references for these applications as in most cases NVML is used to
+  get info such as gpu count and some capabilities and these values are never
+  accessed again and unlikely to change.
+* CUDA applications that fork() but don't call exec() but also don't issue any
+  CUDA API calls will have some leftover references to /dev/nvidia* and fail to
+  checkpoint as a result. This can be worked around in a similar fashion to the
+  NVML case where the leftover references can be ignored as CUDA is not fork()
+  safe anyway.
+* Restore currently requires that you restore on a system with similar GPU's and
+  same GPU count.
+* NVIDIA UVM Managed Memory, MIG (Multi Instance GPU), and MPS (Multi-Process
+  Service) are currently not supported for checkpointing. Future CUDA releases
+  will add support for these.
diff --git a/plugins/cuda/cuda_plugin.c b/plugins/cuda/cuda_plugin.c
new file mode 100644
index 000000000..9ccb04224
--- /dev/null
+++ b/plugins/cuda/cuda_plugin.c
@@ -0,0 +1,631 @@
+#include "criu-log.h"
+#include "plugin.h"
+#include "util.h"
+#include "cr_options.h"
+#include "pid.h"
+#include "proc_parse.h"
+#include "seize.h"
+#include "fault-injection.h"
+
+#include <common/list.h>
+#include <compel/infect.h>
+
+#include <ctype.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+
+/* cuda-checkpoint binary should live in your PATH */
+#define CUDA_CHECKPOINT "cuda-checkpoint"
+
+/* cuda-checkpoint --action flags */
+#define ACTION_LOCK	  "lock"
+#define ACTION_CHECKPOINT "checkpoint"
+#define ACTION_RESTORE	  "restore"
+#define ACTION_UNLOCK	  "unlock"
+
+typedef enum {
+	CUDA_TASK_RUNNING = 0,
+	CUDA_TASK_LOCKED,
+	CUDA_TASK_CHECKPOINTED,
+	CUDA_TASK_UNKNOWN = -1
+} cuda_task_state_t;
+
+#define CUDA_CKPT_BUF_SIZE (128)
+
+#ifdef LOG_PREFIX
+#undef LOG_PREFIX
+#endif
+#define LOG_PREFIX "cuda_plugin: "
+
+/* Disable plugin functionality if cuda-checkpoint is not in $PATH or driver
+ * version doesn't support --action flag
+ */
+bool plugin_disabled = false;
+
+bool plugin_added_to_inventory = false;
+
+struct pid_info {
+	int pid;
+	char checkpointed;
+	cuda_task_state_t initial_task_state;
+	struct list_head list;
+};
+
+/* Used to track which PID's we've paused CUDA operations on so far so we can
+ * release them after we're done with the DUMP
+ */
+static LIST_HEAD(cuda_pids);
+
+static void dealloc_pid_buffer(struct list_head *pid_buf)
+{
+	struct pid_info *info;
+	struct pid_info *n;
+
+	list_for_each_entry_safe(info, n, pid_buf, list) {
+		list_del(&info->list);
+		xfree(info);
+	}
+}
+
+static int add_pid_to_buf(struct list_head *pid_buf, int pid, cuda_task_state_t state)
+{
+	struct pid_info *new = xmalloc(sizeof(*new));
+
+	if (new == NULL) {
+		return -1;
+	}
+
+	new->pid = pid;
+	new->checkpointed = 0;
+	new->initial_task_state = state;
+	list_add_tail(&new->list, pid_buf);
+
+	return 0;
+}
+
+static int launch_cuda_checkpoint(const char **args, char *buf, int buf_size)
+{
+#define READ  0
+#define WRITE 1
+	int fd[2], buf_off;
+
+	if (pipe(fd) != 0) {
+		pr_perror("Couldn't create pipes for reading cuda-checkpoint output");
+		return -1;
+	}
+
+	buf[0] = '\0';
+
+	int child_pid = fork();
+	if (child_pid == -1) {
+		pr_perror("Failed to fork to exec cuda-checkpoint");
+		close(fd[READ]);
+		close(fd[WRITE]);
+		return -1;
+	}
+
+	if (child_pid == 0) { // child
+		if (dup2(fd[WRITE], STDOUT_FILENO) == -1) {
+			pr_perror("unable to clone fd %d->%d", fd[WRITE], STDOUT_FILENO);
+			_exit(EXIT_FAILURE);
+		}
+		if (dup2(fd[WRITE], STDERR_FILENO) == -1) {
+			pr_perror("unable to clone fd %d->%d", fd[WRITE], STDERR_FILENO);
+			_exit(EXIT_FAILURE);
+		}
+		close(fd[READ]);
+
+		close_fds(STDERR_FILENO + 1);
+
+		execvp(args[0], (char **)args);
+
+		/* We can't use pr_error() as log file fd is closed. */
+		fprintf(stderr, "execvp(\"%s\") failed: %s\n", args[0], strerror(errno));
+
+		_exit(EXIT_FAILURE);
+	}
+
+	close(fd[WRITE]);
+	buf_off = 0;
+	/* Reserve one byte for the null charracter. */
+	buf_size--;
+	while (buf_off < buf_size) {
+		int bytes_read;
+		bytes_read = read(fd[READ], buf + buf_off, buf_size - buf_off);
+		if (bytes_read == -1) {
+			pr_perror("Unable to read output of cuda-checkpoint");
+			goto err;
+		}
+		if (bytes_read == 0)
+			break;
+		buf_off += bytes_read;
+	}
+	buf[buf_off] = '\0';
+
+	/* Clear out any of the remaining output in the pipe in case the buffer wasn't large enough */
+	while (true) {
+		char scratch[1024];
+		int bytes_read;
+		bytes_read = read(fd[READ], scratch, sizeof(scratch));
+		if (bytes_read == -1) {
+			pr_perror("Unable to read output of cuda-checkpoint");
+			goto err;
+		}
+		if (bytes_read == 0)
+			break;
+	}
+	close(fd[READ]);
+
+	int status, exit_code = -1;
+	if (waitpid(child_pid, &status, 0) == -1) {
+		pr_perror("Unable to wait for the cuda-checkpoint process %d", child_pid);
+		goto err;
+	}
+	if (WIFSIGNALED(status)) {
+		int sig = WTERMSIG(status);
+		pr_err("cuda-checkpoint unexpectedly signaled with %d: %s\n", sig, strsignal(sig));
+	} else if (WIFEXITED(status)) {
+		exit_code = WEXITSTATUS(status);
+	} else {
+		pr_err("cuda-checkpoint exited improperly: %u\n", status);
+	}
+
+	if (exit_code != EXIT_SUCCESS)
+		pr_debug("cuda-checkpoint output ===>\n%s\n"
+			 "<=== cuda-checkpoint output\n",
+			 buf);
+
+	return exit_code;
+err:
+	kill(child_pid, SIGKILL);
+	waitpid(child_pid, NULL, 0);
+	return -1;
+}
+
+/**
+ * Checks if a given flag is supported by the cuda-checkpoint utility
+ *
+ * Returns:
+ *  1 if the flag is supported,
+ *  0 if the flag is not supported,
+ *  -1 if there was an error launching the cuda-checkpoint utility.
+ */
+static int cuda_checkpoint_supports_flag(const char *flag)
+{
+	char msg_buf[2048];
+	const char *args[] = { CUDA_CHECKPOINT, "-h", NULL };
+
+	if (launch_cuda_checkpoint(args, msg_buf, sizeof(msg_buf)) != 0)
+		return -1;
+
+	if (strstr(msg_buf, flag) == NULL)
+		return 0;
+
+	return 1;
+}
+
+/* Retrieve the cuda restore thread TID from the root pid */
+static int get_cuda_restore_tid(int root_pid)
+{
+	char pid_buf[16];
+	char pid_out[CUDA_CKPT_BUF_SIZE];
+
+	snprintf(pid_buf, sizeof(pid_buf), "%d", root_pid);
+
+	const char *args[] = { CUDA_CHECKPOINT, "--get-restore-tid", "--pid", pid_buf, NULL };
+	int ret = launch_cuda_checkpoint(args, pid_out, sizeof(pid_out));
+	if (ret != 0) {
+		pr_err("Failed to launch cuda-checkpoint to retrieve restore tid: %s\n", pid_out);
+		return -1;
+	}
+
+	return atoi(pid_out);
+}
+
+static cuda_task_state_t get_task_state_enum(const char *state_str)
+{
+	if (strncmp(state_str, "running", 7) == 0)
+		return CUDA_TASK_RUNNING;
+
+	if (strncmp(state_str, "locked", 6) == 0)
+		return CUDA_TASK_LOCKED;
+
+	if (strncmp(state_str, "checkpointed", 12) == 0)
+		return CUDA_TASK_CHECKPOINTED;
+
+	pr_err("Unknown CUDA state: %s\n", state_str);
+	return CUDA_TASK_UNKNOWN;
+}
+
+static cuda_task_state_t get_cuda_state(pid_t pid)
+{
+	char pid_buf[16];
+	char state_str[CUDA_CKPT_BUF_SIZE];
+	const char *args[] = { CUDA_CHECKPOINT, "--get-state", "--pid", pid_buf, NULL };
+
+	snprintf(pid_buf, sizeof(pid_buf), "%d", pid);
+
+	if (launch_cuda_checkpoint(args, state_str, sizeof(state_str))) {
+		pr_err("Failed to launch cuda-checkpoint to retrieve state: %s\n", state_str);
+		return CUDA_TASK_UNKNOWN;
+	}
+
+	return get_task_state_enum(state_str);
+}
+
+static int cuda_process_checkpoint_action(int pid, const char *action, unsigned int timeout, char *msg_buf,
+					  int buf_size)
+{
+	char pid_buf[16];
+	char timeout_buf[16];
+
+	snprintf(pid_buf, sizeof(pid_buf), "%d", pid);
+
+	const char *args[] = { CUDA_CHECKPOINT, "--action", action, "--pid", pid_buf, NULL /* --timeout */,
+			       NULL /* timeout_val */, NULL };
+	if (timeout > 0) {
+		snprintf(timeout_buf, sizeof(timeout_buf), "%d", timeout);
+		args[5] = "--timeout";
+		args[6] = timeout_buf;
+	}
+
+	return launch_cuda_checkpoint(args, msg_buf, buf_size);
+}
+
+static int interrupt_restore_thread(int restore_tid, k_rtsigset_t *restore_sigset)
+{
+	/* Since we resumed a thread that CRIU previously already froze we need to
+	 * INTERRUPT it once again, task was already SEIZE'd so we don't need to do
+	 * a compel_interrupt_task()
+	 */
+	if (ptrace(PTRACE_INTERRUPT, restore_tid, NULL, 0)) {
+		pr_perror("Could not interrupt cuda restore tid %d after checkpoint, process may be in strange state",
+			  restore_tid);
+		return -1;
+	}
+
+	struct proc_status_creds creds;
+	if (compel_wait_task(restore_tid, -1, parse_pid_status, NULL, &creds.s, NULL) != COMPEL_TASK_ALIVE) {
+		pr_err("compel_wait_task failed after interrupt\n");
+		return -1;
+	}
+
+	if (ptrace(PTRACE_SETOPTIONS, restore_tid, NULL, PTRACE_O_SUSPEND_SECCOMP | PTRACE_O_TRACESYSGOOD)) {
+		pr_perror("Failed to set ptrace options on interrupt for restore tid %d", restore_tid);
+		return -1;
+	}
+
+	if (ptrace(PTRACE_SETSIGMASK, restore_tid, sizeof(*restore_sigset), restore_sigset)) {
+		pr_perror("Unable to restore original sigmask to restore tid %d", restore_tid);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int resume_restore_thread(int restore_tid, k_rtsigset_t *save_sigset)
+{
+	k_rtsigset_t block;
+
+	if (ptrace(PTRACE_GETSIGMASK, restore_tid, sizeof(*save_sigset), save_sigset)) {
+		pr_perror("Failed to get current sigmask for restore tid %d", restore_tid);
+		return -1;
+	}
+
+	ksigfillset(&block);
+	ksigdelset(&block, SIGTRAP);
+
+	if (ptrace(PTRACE_SETSIGMASK, restore_tid, sizeof(block), &block)) {
+		pr_perror("Failed to block signals on restore tid %d", restore_tid);
+		return -1;
+	}
+
+	// Clear out PTRACE_O_SUSPEND_SECCOMP when we resume the restore thread
+	if (ptrace(PTRACE_SETOPTIONS, restore_tid, NULL, 0)) {
+		pr_perror("Could not clear ptrace options on restore tid %d", restore_tid);
+		return -1;
+	}
+
+	if (ptrace(PTRACE_CONT, restore_tid, NULL, 0)) {
+		pr_perror("Could not resume cuda restore tid %d", restore_tid);
+		return -1;
+	}
+
+	return 0;
+}
+
+int cuda_plugin_checkpoint_devices(int pid)
+{
+	int restore_tid;
+	char msg_buf[CUDA_CKPT_BUF_SIZE];
+	int int_ret;
+	int status;
+	k_rtsigset_t save_sigset;
+	struct pid_info *task_info;
+	bool pid_found = false;
+
+	if (plugin_disabled) {
+		return -ENOTSUP;
+	}
+
+	restore_tid = get_cuda_restore_tid(pid);
+
+	/* We can possibly hit a race with cuInit() where we are past the point of
+	 * locking the process but at lock time cuInit() hadn't completed in which
+	 * case cuda-checkpoint will report that we're in an invalid state to
+	 * checkpoint
+	 */
+	if (restore_tid == -1) {
+		pr_info("No need to checkpoint devices on pid %d\n", pid);
+		return 0;
+	}
+
+	/* Check if the process is already in a checkpointed state */
+	list_for_each_entry(task_info, &cuda_pids, list) {
+		if (task_info->pid == pid) {
+			if (task_info->initial_task_state == CUDA_TASK_CHECKPOINTED) {
+				pr_info("pid %d already in a checkpointed state\n", pid);
+				return 0;
+			}
+			pid_found = true;
+			break;
+		}
+	}
+
+	if (pid_found == false) {
+		/* We return an error here. The task should be restored
+		 * to its original state at cuda_plugin_fini().
+		 */
+		pr_err("Failed to track pid %d\n", pid);
+		return -1;
+	}
+
+	pr_info("Checkpointing CUDA devices on pid %d restore_tid %d\n", pid, restore_tid);
+	/* We need to resume the checkpoint thread to prepare the mappings for
+	 * checkpointing
+	 */
+	if (resume_restore_thread(restore_tid, &save_sigset)) {
+		return -1;
+	}
+
+	task_info->checkpointed = 1;
+	status = cuda_process_checkpoint_action(pid, ACTION_CHECKPOINT, 0, msg_buf, sizeof(msg_buf));
+	if (status) {
+		pr_err("CHECKPOINT_DEVICES failed with %s\n", msg_buf);
+	}
+
+	int_ret = interrupt_restore_thread(restore_tid, &save_sigset);
+	return status != 0 ? -1 : int_ret;
+}
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, cuda_plugin_checkpoint_devices);
+
+int cuda_plugin_pause_devices(int pid)
+{
+	int restore_tid;
+	char msg_buf[CUDA_CKPT_BUF_SIZE];
+	cuda_task_state_t task_state;
+
+	if (plugin_disabled) {
+		return -ENOTSUP;
+	}
+
+	restore_tid = get_cuda_restore_tid(pid);
+
+	if (restore_tid == -1) {
+		pr_info("no need to pause devices on pid %d\n", pid);
+		return 0;
+	}
+
+	task_state = get_cuda_state(restore_tid);
+	if (task_state == CUDA_TASK_UNKNOWN) {
+		pr_err("Failed to get CUDA state for PID %d\n", restore_tid);
+		return -1;
+	}
+
+	if (!plugin_added_to_inventory) {
+		if (add_inventory_plugin(CR_PLUGIN_DESC.name)) {
+			pr_err("Failed to add CUDA plugin to inventory image\n");
+			return -1;
+		}
+		plugin_added_to_inventory = true;
+	}
+
+	if (task_state == CUDA_TASK_LOCKED) {
+		pr_info("pid %d already in a locked state\n", pid);
+		/* Leave this PID in a "locked" state at resume_device() */
+		add_pid_to_buf(&cuda_pids, pid, CUDA_TASK_LOCKED);
+		return 0;
+	}
+
+	if (task_state == CUDA_TASK_CHECKPOINTED) {
+		/* We need to skip this PID in cuda_plugin_checkpoint_devices(),
+		 * and leave it in a "checkpoined" state at resume_device(). */
+		add_pid_to_buf(&cuda_pids, pid, CUDA_TASK_CHECKPOINTED);
+		return 0;
+	}
+
+	pr_info("pausing devices on pid %d\n", pid);
+	int status = cuda_process_checkpoint_action(pid, ACTION_LOCK, opts.timeout * 1000, msg_buf, sizeof(msg_buf));
+	if (status) {
+		pr_err("PAUSE_DEVICES failed with %s\n", msg_buf);
+		if (alarm_timeouted())
+			goto unlock;
+		return -1;
+	}
+
+	if (add_pid_to_buf(&cuda_pids, pid, CUDA_TASK_RUNNING)) {
+		pr_err("unable to track paused pid %d\n", pid);
+		goto unlock;
+	}
+
+	return 0;
+unlock:
+	status = cuda_process_checkpoint_action(pid, ACTION_UNLOCK, 0, msg_buf, sizeof(msg_buf));
+	if (status) {
+		pr_err("Failed to unlock process status %s, pid %d may hang\n", msg_buf, pid);
+	}
+	return -1;
+}
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__PAUSE_DEVICES, cuda_plugin_pause_devices)
+
+int resume_device(int pid, int checkpointed, cuda_task_state_t initial_task_state)
+{
+	char msg_buf[CUDA_CKPT_BUF_SIZE];
+	int status;
+	int ret = 0;
+	int int_ret;
+	k_rtsigset_t save_sigset;
+
+	if (initial_task_state == CUDA_TASK_UNKNOWN) {
+		pr_info("skip resume for PID %d (unknown state)\n", pid);
+		return 0;
+	}
+
+	int restore_tid = get_cuda_restore_tid(pid);
+	if (restore_tid == -1) {
+		pr_info("No need to resume devices on pid %d\n", pid);
+		return 0;
+	}
+
+	pr_info("resuming devices on pid %d\n", pid);
+	/* The resuming process has to stay frozen during this time otherwise
+	 * attempting to access a UVM pointer will crash if we haven't restored the
+	 * underlying mappings yet
+	 */
+	pr_debug("Restore thread pid %d found for real pid %d\n", restore_tid, pid);
+	/* wakeup the restore thread so we can handle the restore for this pid,
+	 * rseq_cs has to be restored before execution
+	 */
+	if (resume_restore_thread(restore_tid, &save_sigset)) {
+		return -1;
+	}
+
+	if (checkpointed && (initial_task_state == CUDA_TASK_RUNNING || initial_task_state == CUDA_TASK_LOCKED)) {
+		/* If the process was "locked" or "running" before checkpointing it, we need to restore it */
+		status = cuda_process_checkpoint_action(pid, ACTION_RESTORE, 0, msg_buf, sizeof(msg_buf));
+		if (status) {
+			pr_err("RESUME_DEVICES RESTORE failed with %s\n", msg_buf);
+			ret = -1;
+			goto interrupt;
+		}
+	}
+
+	if (initial_task_state == CUDA_TASK_RUNNING) {
+		/* If the process was "running" before we paused it, we need to unlock it */
+		status = cuda_process_checkpoint_action(pid, ACTION_UNLOCK, 0, msg_buf, sizeof(msg_buf));
+		if (status) {
+			pr_err("RESUME_DEVICES UNLOCK failed with %s\n", msg_buf);
+			ret = -1;
+		}
+	}
+
+interrupt:
+	int_ret = interrupt_restore_thread(restore_tid, &save_sigset);
+
+	return ret != 0 ? ret : int_ret;
+}
+
+int cuda_plugin_resume_devices_late(int pid)
+{
+	if (plugin_disabled) {
+		return -ENOTSUP;
+	}
+
+	/* RESUME_DEVICES_LATE is used during `criu restore`.
+	 * Here, we assume that users expect the target process
+	 * to be in a "running" state after restore, even if it was
+	 * in a "locked" or "checkpointed" state during `criu dump`.
+	 */
+	return resume_device(pid, 1, CUDA_TASK_RUNNING);
+}
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, cuda_plugin_resume_devices_late)
+
+/**
+ * Check if a CUDA device is available on the system
+ */
+static bool is_cuda_device_available(void)
+{
+	const char *gpu_path = "/proc/driver/nvidia/gpus/";
+	struct stat sb;
+
+	if (stat(gpu_path, &sb) != 0)
+		return false;
+
+	return S_ISDIR(sb.st_mode);
+}
+
+int cuda_plugin_init(int stage)
+{
+	int ret;
+
+	/* Disable CUDA checkpointing with pre-dump */
+	if (stage == CR_PLUGIN_STAGE__PRE_DUMP) {
+		plugin_disabled = true;
+		return 0;
+	}
+
+	if (stage == CR_PLUGIN_STAGE__RESTORE) {
+		if (!check_and_remove_inventory_plugin(CR_PLUGIN_DESC.name, strlen(CR_PLUGIN_DESC.name))) {
+			plugin_disabled = true;
+			return 0;
+		}
+	}
+
+	if (!fault_injected(FI_PLUGIN_CUDA_FORCE_ENABLE) && !is_cuda_device_available()) {
+		pr_info("No GPU device found; CUDA plugin is disabled\n");
+		plugin_disabled = true;
+		return 0;
+	}
+
+	ret = cuda_checkpoint_supports_flag("--action");
+	if (ret == -1) {
+		pr_warn("check that %s is present in $PATH\n", CUDA_CHECKPOINT);
+		plugin_disabled = true;
+		return 0;
+	}
+
+	if (ret == 0) {
+		pr_warn("cuda-checkpoint --action flag not supported, an r555 or higher version driver is required. Disabling CUDA plugin\n");
+		plugin_disabled = true;
+		return 0;
+	}
+
+	pr_info("initialized: %s stage %d\n", CR_PLUGIN_DESC.name, stage);
+
+	/* In the DUMP stage track all the PID's we've paused CUDA operations on to
+	 * release them when we're done if the user requested the leave-running option
+	 */
+	if (stage == CR_PLUGIN_STAGE__DUMP) {
+		INIT_LIST_HEAD(&cuda_pids);
+	}
+
+	set_compel_interrupt_only_mode();
+
+	return 0;
+}
+
+void cuda_plugin_fini(int stage, int ret)
+{
+	if (plugin_disabled) {
+		return;
+	}
+
+	pr_info("finished %s stage %d err %d\n", CR_PLUGIN_DESC.name, stage, ret);
+
+	/* Release all the paused PID's at the end of the DUMP stage in case the
+	 * user provides the -R (leave-running) flag or an error occurred
+	 */
+	if (stage == CR_PLUGIN_STAGE__DUMP && (opts.final_state == TASK_ALIVE || ret != 0)) {
+		struct pid_info *info;
+		list_for_each_entry(info, &cuda_pids, list) {
+			resume_device(info->pid, info->checkpointed, info->initial_task_state);
+		}
+	}
+	if (stage == CR_PLUGIN_STAGE__DUMP) {
+		dealloc_pid_buffer(&cuda_pids);
+	}
+}
+CR_PLUGIN_REGISTER("cuda_plugin", cuda_plugin_init, cuda_plugin_fini)
diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine
index cab72e8a1..ed883f300 100644
--- a/scripts/build/Dockerfile.alpine
+++ b/scripts/build/Dockerfile.alpine
@@ -1,51 +1,14 @@
 FROM alpine
 ARG CC=gcc
 
-RUN apk update && apk add \
-	$CC \
-	bash \
-	build-base \
-	coreutils \
-	procps \
-	git \
-	gnutls-dev \
-	libaio-dev \
-	libcap-dev \
-	libnet-dev \
-	libnl3-dev \
-	nftables \
-	nftables-dev \
-	pkgconfig \
-	protobuf-c-dev \
-	protobuf-dev \
-	py3-pip \
-	py3-protobuf \
-	python3 \
-	sudo
-
 COPY . /criu
 WORKDIR /criu
-RUN make mrproper && date && make -j $(nproc) CC="$CC" && date
 
-RUN apk add \
-	ip6tables \
-	iptables \
-	nftables \
-	iproute2 \
-	tar \
-	bash \
-	go \
-	e2fsprogs \
-	py-yaml \
-	py3-flake8 \
-	asciidoctor
+RUN apk add --no-cache "$CC" && /criu/contrib/dependencies/apk-packages.sh
+
+RUN make mrproper && date && make -j $(nproc) CC="$CC" && date
 
 # The rpc test cases are running as user #1000, let's add the user
 RUN adduser -u 1000 -D test
 
-RUN pip3 install junit_xml
-
-# For zdtm we need an unversioned python binary
-RUN ln -s /usr/bin/python3 /usr/bin/python
-
 RUN make -C test/zdtm
diff --git a/scripts/build/Dockerfile.amd-rocm b/scripts/build/Dockerfile.amd-rocm
index c0d181b03..ed66ae4fe 100644
--- a/scripts/build/Dockerfile.amd-rocm
+++ b/scripts/build/Dockerfile.amd-rocm
@@ -55,8 +55,8 @@ RUN apt-get clean -qqy && apt-get update -qqy && apt-get install -qqy --no-insta
 	protobuf-compiler \
 	python-protobuf \
 	python3-minimal \
-	python3-future \
 	python-ipaddress \
+	uuid-dev \
 	curl \
 	wget \
 	vim \
diff --git a/scripts/build/Dockerfile.archlinux b/scripts/build/Dockerfile.archlinux
index d226244ee..261bd2d79 100644
--- a/scripts/build/Dockerfile.archlinux
+++ b/scripts/build/Dockerfile.archlinux
@@ -2,39 +2,14 @@ FROM docker.io/library/archlinux:latest
 
 ARG CC=gcc
 
-RUN pacman -Syu --noconfirm \
-	$CC \
-	bash \
-	make \
-	coreutils \
-	git \
-	gnutls \
-	libaio \
-	libcap \
-	libnet \
-	libnl \
-	nftables \
-	pkgconfig \
-	protobuf-c \
-	protobuf \
-	python-pip \
-	python-protobuf \
-	which \
-	sudo \
-	iptables \
-	nftables \
-	iproute2 \
-	tar \
-	bash \
-	go \
-	python-yaml \
-	flake8 \
-	asciidoctor \
-	python-junit-xml \
-	diffutils
+# Initialize machine ID
+RUN systemd-machine-id-setup
 
 COPY . /criu
 WORKDIR /criu
+
+RUN pacman -Syu --noconfirm "$CC" && contrib/dependencies/pacman-packages.sh
+
 RUN make mrproper && date && make -j $(nproc) CC="$CC" && date
 
 # The rpc test cases are running as user #1000, let's add the user
diff --git a/scripts/build/Dockerfile.centos7 b/scripts/build/Dockerfile.centos7
deleted file mode 100644
index 21e70ff0e..000000000
--- a/scripts/build/Dockerfile.centos7
+++ /dev/null
@@ -1,45 +0,0 @@
-FROM centos:7
-
-ARG CC=gcc
-
-RUN yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
-RUN yum install -y \
-	findutils \
-	gcc \
-	git \
-	gnutls-devel \
-	iproute \
-	iptables \
-	libaio-devel \
-	libasan \
-	libcap-devel \
-	libnet-devel \
-	libnl3-devel \
-	make \
-	procps-ng \
-	protobuf-c-devel \
-	protobuf-devel \
-	protobuf-python \
-	python \
-	python-flake8 \
-	python-ipaddress \
-	python2-future \
-	python2-junit_xml \
-	python-yaml \
-	python-six \
-	sudo \
-	tar \
-	which \
-	e2fsprogs \
-	python2-pip \
-	rubygem-asciidoctor
-
-COPY . /criu
-WORKDIR /criu
-
-RUN make mrproper && date && make -j $(nproc) CC="$CC" && date
-
-# The rpc test cases are running as user #1000, let's add the user
-RUN adduser -u 1000 test
-
-RUN make -C test/zdtm -j $(nproc)
diff --git a/scripts/build/Dockerfile.centos8 b/scripts/build/Dockerfile.centos8
deleted file mode 100644
index 488f95d65..000000000
--- a/scripts/build/Dockerfile.centos8
+++ /dev/null
@@ -1,52 +0,0 @@
-FROM registry.centos.org/centos/centos:8
-
-ARG CC=gcc
-
-RUN yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm dnf-plugins-core
-RUN yum config-manager --set-enabled powertools
-RUN yum install -y --allowerasing \
-	asciidoc \
-	coreutils \
-	chkconfig \
-	diffutils \
-	findutils \
-	gcc \
-	git \
-	gnutls-devel \
-	iproute \
-	iptables \
-	libaio-devel \
-	libasan \
-	libcap-devel \
-	libnet-devel \
-	libnl3-devel \
-	libselinux-devel \
-	make \
-	procps-ng \
-	protobuf-c-devel \
-	protobuf-devel \
-	python3-devel \
-	python3-flake8 \
-	python3-PyYAML \
-	python3-future \
-	python3-protobuf \
-	python3-pip \
-	sudo \
-	tar \
-	which \
-	xmlto
-
-RUN alternatives --set python /usr/bin/python3
-ENV PYTHON=python3
-
-COPY . /criu
-WORKDIR /criu
-
-RUN make mrproper && date && make -j $(nproc) CC="$CC" && date
-
-# The rpc test cases are running as user #1000, let's add the user
-RUN adduser -u 1000 test
-
-RUN pip3 install junit_xml
-
-RUN make -C test/zdtm -j $(nproc)
diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl
index 9d3bb0f87..c26a5fd57 100644
--- a/scripts/build/Dockerfile.fedora.tmpl
+++ b/scripts/build/Dockerfile.fedora.tmpl
@@ -1,11 +1,10 @@
 ARG CC=gcc
 
-COPY scripts/ci/prepare-for-fedora-rawhide.sh /bin/prepare-for-fedora-rawhide.sh
-RUN /bin/prepare-for-fedora-rawhide.sh
-
 COPY . /criu
 WORKDIR /criu
 
+RUN dnf install -y "$CC" && scripts/ci/prepare-for-fedora-rawhide.sh
+
 RUN make mrproper && date && make -j $(nproc) CC="$CC" && date
 
 # The rpc test cases are running as user #1000, let's add the user
diff --git a/scripts/build/Dockerfile.hotspot-alpine b/scripts/build/Dockerfile.hotspot-alpine
new file mode 100644
index 000000000..cd632dddf
--- /dev/null
+++ b/scripts/build/Dockerfile.hotspot-alpine
@@ -0,0 +1,11 @@
+FROM docker.io/library/eclipse-temurin:11-alpine
+ARG CC=gcc
+
+COPY . /criu
+WORKDIR /criu
+
+RUN apk add --no-cache maven "$CC" && contrib/dependencies/apk-packages.sh
+
+RUN make mrproper && make -j $(nproc) CC="$CC"
+
+ENTRYPOINT ["mvn", "-q", "-f", "test/javaTests/pom.xml", "test"]
diff --git a/scripts/build/Dockerfile.hotspot-ubuntu b/scripts/build/Dockerfile.hotspot-ubuntu
new file mode 100644
index 000000000..a459e1ec7
--- /dev/null
+++ b/scripts/build/Dockerfile.hotspot-ubuntu
@@ -0,0 +1,11 @@
+FROM docker.io/library/eclipse-temurin:11-jammy
+ARG CC=gcc
+
+COPY . /criu
+WORKDIR /criu
+
+RUN contrib/apt-install maven "$CC" && contrib/dependencies/apt-packages.sh
+
+RUN make mrproper && make -j $(nproc) CC="$CC"
+
+ENTRYPOINT ["mvn", "-q", "-f", "test/javaTests/pom.xml", "test"]
diff --git a/scripts/build/Dockerfile.linux32.tmpl b/scripts/build/Dockerfile.linux32.tmpl
index a15038631..a37f16e49 100644
--- a/scripts/build/Dockerfile.linux32.tmpl
+++ b/scripts/build/Dockerfile.linux32.tmpl
@@ -1,32 +1,10 @@
 ARG CC=gcc
 
-COPY scripts/ci/apt-install /bin/apt-install
-
-RUN apt-install \
-	libnet-dev \
-	libnl-route-3-dev \
-	$CC \
-	bsdmainutils \
-	build-essential \
-	git-core \
-	iptables \
-	libaio-dev \
-	libcap-dev \
-	libgnutls28-dev \
-	libgnutls30 \
-	libnl-3-dev \
-	libprotobuf-c-dev \
-	libprotobuf-dev \
-	libselinux-dev \
-	pkg-config \
-	protobuf-c-compiler \
-	protobuf-compiler \
-	python3-minimal \
-	python3-future
-
 COPY . /criu
 WORKDIR /criu
 
+RUN contrib/apt-install "$CC" && contrib/dependencies/apt-packages.sh
+
 RUN uname -m && setarch linux32 uname -m && setarch --list
 
 RUN make mrproper && date && \
diff --git a/scripts/build/Dockerfile.openj9-alpine b/scripts/build/Dockerfile.openj9-alpine
deleted file mode 100644
index f92011283..000000000
--- a/scripts/build/Dockerfile.openj9-alpine
+++ /dev/null
@@ -1,32 +0,0 @@
-# FIXME: Replace with eclipse-temurin once Alpine support has been added.
-# https://github.com/adoptium/containers/pull/60
-FROM adoptopenjdk/openjdk8-openj9:alpine
-ARG CC=gcc
-
-RUN apk update && apk add \
-	bash \
-	build-base \
-	coreutils \
-	git \
-	gnutls-dev \
-	libaio-dev \
-	libcap-dev \
-	libnet-dev \
-	libnl3-dev \
-	pkgconfig \
-	protobuf-c-dev \
-	protobuf-dev \
-	python3 \
-	sudo \
-	maven \
-	ip6tables \
-	iptables \
-	bash
-
-COPY . /criu
-WORKDIR /criu
-
-RUN make mrproper && make -j $(nproc) CC="$CC"
-
-ENTRYPOINT mvn -q -f test/javaTests/pom.xml test
-
diff --git a/scripts/build/Dockerfile.openj9-ubuntu b/scripts/build/Dockerfile.openj9-ubuntu
index 8936adf81..18664f100 100644
--- a/scripts/build/Dockerfile.openj9-ubuntu
+++ b/scripts/build/Dockerfile.openj9-ubuntu
@@ -1,34 +1,12 @@
-FROM docker.io/library/eclipse-temurin:8-focal
+FROM docker.io/library/ibm-semeru-runtimes:open-11-jdk-jammy
 ARG CC=gcc
 
-COPY scripts/ci/apt-install /bin/apt-install
-
-RUN apt-install protobuf-c-compiler \
-	libprotobuf-c-dev \
-	libaio-dev \
-	python3-future \
-	libprotobuf-dev \
-	protobuf-compiler \
-	libcap-dev \
-	libnl-3-dev \
-	gdb \
-	bash \
-	python3-protobuf \
-	python3-yaml \
-	libnet-dev \
-	libnl-route-3-dev \
-	libbsd-dev \
-	make \
-	git \
-	pkg-config \
-	iptables \
-	gcc \
-	maven
-
+RUN mkdir -p /etc/criu && echo 'ghost-limit 16777216' > /etc/criu/default.conf
 COPY . /criu
 WORKDIR /criu
 
+RUN contrib/apt-install maven "$CC" && contrib/dependencies/apt-packages.sh
+
 RUN make mrproper && make -j $(nproc) CC="$CC"
 
-ENTRYPOINT mvn -q -f test/javaTests/pom.xml test
-
+ENTRYPOINT ["mvn", "-f", "test/javaTests/pom.xml", "test"]
diff --git a/scripts/build/Dockerfile.riscv64-stable-cross.hdr b/scripts/build/Dockerfile.riscv64-stable-cross.hdr
new file mode 100644
index 000000000..d4c414023
--- /dev/null
+++ b/scripts/build/Dockerfile.riscv64-stable-cross.hdr
@@ -0,0 +1,5 @@
+FROM ubuntu:jammy
+
+ENV ARCH=riscv64
+ENV DEBIAN_ARCH=riscv64
+ENV CROSS_TRIPLET=riscv64-linux-gnu
diff --git a/scripts/build/Dockerfile.riscv64-stable-cross.tmpl b/scripts/build/Dockerfile.riscv64-stable-cross.tmpl
new file mode 100644
index 000000000..8933a6c82
--- /dev/null
+++ b/scripts/build/Dockerfile.riscv64-stable-cross.tmpl
@@ -0,0 +1,31 @@
+# Add the cross compiler sources
+RUN apt-get clean -y && apt-get update -y && apt-get install -y --no-install-recommends gnupg2
+
+RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 871920D1991BC93C 8D69674688B6CB36 B523E5F3FC4E5F2C
+
+COPY scripts/ci/riscv64-cross/amd64-sources.list /etc/apt/sources.list
+
+COPY scripts/ci/riscv64-cross/riscv64-sources.list /etc/apt/sources.list.d/
+
+RUN dpkg --add-architecture ${DEBIAN_ARCH} && \
+	apt-get update -y
+
+ENV CROSS_COMPILE=${CROSS_TRIPLET}-				\
+	CROSS_ROOT=/usr/${CROSS_TRIPLET}			\
+	AS=/usr/bin/${CROSS_TRIPLET}-as				\
+	AR=/usr/bin/${CROSS_TRIPLET}-ar				\
+	CC=/usr/bin/${CROSS_TRIPLET}-gcc			\
+	CPP=/usr/bin/${CROSS_TRIPLET}-cpp			\
+	CXX=/usr/bin/${CROSS_TRIPLET}-g++			\
+	LD=/usr/bin/${CROSS_TRIPLET}-ld				\
+	FC=/usr/bin/${CROSS_TRIPLET}-gfortran
+
+ENV PATH="${PATH}:${CROSS_ROOT}/bin"				\
+	PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLET}/pkgconfig
+
+COPY . /criu
+WORKDIR /criu
+
+RUN contrib/dependencies/apt-cross-packages.sh
+
+RUN make mrproper && date && make -j $(nproc) zdtm && date
diff --git a/scripts/build/Dockerfile.stable-cross.tmpl b/scripts/build/Dockerfile.stable-cross.tmpl
index 6a68cd1ca..56104081f 100644
--- a/scripts/build/Dockerfile.stable-cross.tmpl
+++ b/scripts/build/Dockerfile.stable-cross.tmpl
@@ -1,28 +1,7 @@
-COPY scripts/ci/apt-install /bin/apt-install
-
 # Add the cross compiler sources
 RUN echo "deb http://deb.debian.org/debian/ stable main" >> /etc/apt/sources.list && \
   dpkg --add-architecture ${DEBIAN_ARCH}
 
-RUN apt-install \
-	crossbuild-essential-${DEBIAN_ARCH}	\
-	libc6-dev-${DEBIAN_ARCH}-cross		\
-	libc6-${DEBIAN_ARCH}-cross		\
-	libbz2-dev:${DEBIAN_ARCH}		\
-	libexpat1-dev:${DEBIAN_ARCH}		\
-	ncurses-dev:${DEBIAN_ARCH}		\
-	libssl-dev:${DEBIAN_ARCH}		\
-	protobuf-c-compiler			\
-	protobuf-compiler			\
-	python3-protobuf			\
-	libnl-3-dev:${DEBIAN_ARCH}		\
-	libprotobuf-dev:${DEBIAN_ARCH}		\
-	libnet-dev:${DEBIAN_ARCH}		\
-	libprotobuf-c-dev:${DEBIAN_ARCH}	\
-	libcap-dev:${DEBIAN_ARCH}		\
-	libaio-dev:${DEBIAN_ARCH}		\
-	libnl-route-3-dev:${DEBIAN_ARCH}
-
 ENV CROSS_COMPILE=${CROSS_TRIPLET}-				\
 	CROSS_ROOT=/usr/${CROSS_TRIPLET}			\
 	AS=/usr/bin/${CROSS_TRIPLET}-as				\
@@ -39,4 +18,12 @@ ENV PATH="${PATH}:${CROSS_ROOT}/bin"				\
 COPY . /criu
 WORKDIR /criu
 
-RUN	make mrproper && date && make -j $(nproc) zdtm && date
+RUN contrib/dependencies/apt-cross-packages.sh
+
+# amdgpu_plugin with armv7 is not supported
+RUN	make mrproper && date && \
+	make -j $(nproc) && \
+	if [ "$SUBARCH" != "armv7" ]; then \
+		make -j $(nproc) amdgpu_plugin; \
+	fi && \
+	make -j $(nproc) zdtm && date
diff --git a/scripts/build/Dockerfile.tmpl b/scripts/build/Dockerfile.tmpl
index e0e72372d..498b99be9 100644
--- a/scripts/build/Dockerfile.tmpl
+++ b/scripts/build/Dockerfile.tmpl
@@ -1,41 +1,12 @@
 ARG CC=gcc
 
-COPY scripts/ci/apt-install /bin/apt-install
-
-# On Ubuntu, kernel modules such as ip_tables and xt_mark may not be loaded by default
-# We need to install kmod to enable iptables to load these modules for us.
-RUN apt-install \
-	libnet-dev \
-	libnl-route-3-dev \
-	$CC \
-	bsdmainutils \
-	build-essential \
-	git-core \
-	iptables \
-	libaio-dev \
-	libbsd-dev \
-	libcap-dev \
-	libgnutls28-dev \
-	libgnutls30 \
-	libnftables-dev \
-	libnl-3-dev \
-	libprotobuf-c-dev \
-	libprotobuf-dev \
-	libselinux-dev \
-	iproute2 \
-	kmod \
-	pkg-config \
-	protobuf-c-compiler \
-	protobuf-compiler \
-	python-is-python3 \
-	python3-minimal \
-	python3-protobuf \
-	python3-yaml \
-	python3-future
-
 COPY . /criu
 WORKDIR /criu
 
+# On Ubuntu, kernel modules such as ip_tables and xt_mark may not be loaded by default
+# We need to install kmod to enable iptables to load these modules for us.
+RUN contrib/apt-install "$CC" && contrib/dependencies/apt-packages.sh
+
 RUN git clean -dfx && date && \
 # Check single object build
 	make -j $(nproc) CC="$CC" criu/parasite-syscall.o && \
diff --git a/scripts/build/Dockerfile.unstable-cross.tmpl b/scripts/build/Dockerfile.unstable-cross.tmpl
index dacfd96ef..7edb289b6 100644
--- a/scripts/build/Dockerfile.unstable-cross.tmpl
+++ b/scripts/build/Dockerfile.unstable-cross.tmpl
@@ -1,28 +1,7 @@
-COPY scripts/ci/apt-install /bin/apt-install
-
 # Add the cross compiler sources
 RUN echo "deb http://deb.debian.org/debian/ unstable main" >> /etc/apt/sources.list && \
   dpkg --add-architecture ${DEBIAN_ARCH}
 
-RUN apt-install \
-	crossbuild-essential-${DEBIAN_ARCH}	\
-	libc6-dev-${DEBIAN_ARCH}-cross		\
-	libc6-${DEBIAN_ARCH}-cross		\
-	libbz2-dev:${DEBIAN_ARCH}		\
-	libexpat1-dev:${DEBIAN_ARCH}		\
-	ncurses-dev:${DEBIAN_ARCH}		\
-	libssl-dev:${DEBIAN_ARCH}		\
-	protobuf-c-compiler			\
-	protobuf-compiler			\
-	python3-protobuf			\
-	libnl-3-dev:${DEBIAN_ARCH}		\
-	libprotobuf-dev:${DEBIAN_ARCH}		\
-	libnet-dev:${DEBIAN_ARCH}		\
-	libprotobuf-c-dev:${DEBIAN_ARCH}	\
-	libcap-dev:${DEBIAN_ARCH}		\
-	libaio-dev:${DEBIAN_ARCH}		\
-	libnl-route-3-dev:${DEBIAN_ARCH}
-
 ENV CROSS_COMPILE=${CROSS_TRIPLET}-				\
 	CROSS_ROOT=/usr/${CROSS_TRIPLET}			\
 	AS=/usr/bin/${CROSS_TRIPLET}-as				\
@@ -39,4 +18,6 @@ ENV PATH="${PATH}:${CROSS_ROOT}/bin"				\
 COPY . /criu
 WORKDIR /criu
 
-RUN	make mrproper && date && make -j $(nproc) zdtm && date
+RUN contrib/dependencies/apt-cross-packages.sh
+
+RUN make mrproper && date && make -j $(nproc) zdtm && date
diff --git a/scripts/build/Dockerfile.x86_64.hdr b/scripts/build/Dockerfile.x86_64.hdr
index 32fc2978a..a666f6c26 100644
--- a/scripts/build/Dockerfile.x86_64.hdr
+++ b/scripts/build/Dockerfile.x86_64.hdr
@@ -1,5 +1,5 @@
-FROM ubuntu:focal
+FROM ubuntu:24.04
 
-COPY scripts/ci/apt-install /bin/apt-install
+COPY contrib/apt-install /bin/apt-install
 
 RUN apt-install gcc-multilib
diff --git a/scripts/build/Makefile b/scripts/build/Makefile
index 2c006ad87..a420cea94 100644
--- a/scripts/build/Makefile
+++ b/scripts/build/Makefile
@@ -1,5 +1,5 @@
-ARCHES := x86_64 fedora-asan fedora-rawhide centos7 armv7hf centos8
-STABLE_CROSS_ARCHES := armv7-stable-cross aarch64-stable-cross ppc64-stable-cross mips64el-stable-cross
+ARCHES := x86_64 fedora-asan fedora-rawhide armv7hf
+STABLE_CROSS_ARCHES := armv7-stable-cross aarch64-stable-cross ppc64-stable-cross mips64el-stable-cross riscv64-stable-cross
 UNSTABLE_CROSS_ARCHES := armv7-unstable-cross aarch64-unstable-cross ppc64-unstable-cross mips64el-unstable-cross
 NON_CLANG := $(UNSTABLE_CROSS_ARCHES) $(STABLE_CROSS_ARCHES)
 CREATE_DOCKERFILES := $(ARCHES) $(NON_CLANG)
diff --git a/scripts/ci/Makefile b/scripts/ci/Makefile
index 120f561e4..bad8065f2 100644
--- a/scripts/ci/Makefile
+++ b/scripts/ci/Makefile
@@ -11,7 +11,7 @@ ifdef CLANG
 	target-suffix = -clang
 endif
 
-TARGETS := alpine fedora-rawhide centos7 centos8 archlinux
+TARGETS := alpine fedora-rawhide archlinux
 ZDTM_OPTS :=
 UNAME := $(shell uname -m)
 export UNAME
@@ -20,14 +20,6 @@ export CONTAINER_RUNTIME
 
 alpine: ZDTM_OPTS=-x zdtm/static/binfmt_misc -x zdtm/static/sched_policy00
 
-define DOCKER_JSON
-{
-	"storage-driver": "devicemapper"
-}
-endef
-
-export DOCKER_JSON
-
 ifeq ($(GITHUB_ACTIONS),true)
 	# GitHub Actions does not give us a real TTY and errors out with
 	# 'the input device is not a TTY' if using '-t'
@@ -38,43 +30,29 @@ endif
 
 export CONTAINER_TERMINAL
 
+# Here we assume that any CPU architecture besides x86_64 is running in containers
+# that may not support running docker with '--privileged'.
 ifeq ($(UNAME),x86_64)
-	# On anything besides x86_64 Travis is running unprivileged LXD
-	# containers which do not support running docker with '--privileged'.
 	CONTAINER_OPTS := --rm $(CONTAINER_TERMINAL) --privileged --userns=host --cgroupns=host -v /lib/modules:/lib/modules --tmpfs /run
 else
 	CONTAINER_OPTS := --rm -v /lib/modules:/lib/modules --tmpfs /run
 endif
 
 ifeq ($(CONTAINER_RUNTIME),podman)
-	# Just as Docker needs to use devicemapper Podman needs vfs
-	# as graphdriver as overlayfs does not support all test cases
-	STORAGE_DRIVER := vfs
 	# Podman limits the number of processes in a container using cgroups.
 	# Disable it as it breaks the thread-bomb test
 	CONTAINER_OPTS += --pids-limit=0
 endif
 
-export STORAGE_DRIVER
-
-restart-docker:
-	if [ "$$UNAME" = "x86_64" ] && [ "$$CONTAINER_RUNTIME" = "docker" ]; then \
-		echo "$$DOCKER_JSON" > /etc/docker/daemon.json; \
-		cat /etc/docker/daemon.json; \
-		systemctl status docker; \
-		systemctl restart docker; \
-		systemctl status docker; \
-	fi
-
 export ZDTM_OPTS
 
-$(TARGETS): restart-docker
+$(TARGETS):
 	$(MAKE) -C ../build $@$(target-suffix)
-	$(CONTAINER_RUNTIME) run --env-file docker.env $(if $(ZDTM_OPTS),-e ZDTM_OPTS) $(CONTAINER_OPTS) criu-$@ scripts/ci/run-ci-tests.sh
+	$(CONTAINER_RUNTIME) run --env-file docker.env -v `pwd`/../../:/criu $(if $(ZDTM_OPTS),-e ZDTM_OPTS) $(CONTAINER_OPTS) criu-$@ scripts/ci/run-ci-tests.sh
 
-fedora-asan: restart-docker
+fedora-asan:
 	$(MAKE) -C ../build $@$(target-suffix)
-	$(CONTAINER_RUNTIME) run $(CONTAINER_OPTS) criu-$@ ./scripts/ci/asan.sh $(ZDTM_OPTS)
+	$(CONTAINER_RUNTIME) run $(CONTAINER_OPTS) -v `pwd`/../../:/criu criu-$@ ./scripts/ci/asan.sh $(ZDTM_OPTS)
 
 docker-test:
 	./docker-test.sh
@@ -82,11 +60,8 @@ docker-test:
 podman-test:
 	./podman-test.sh
 
-# overlayfs behaves differently on Ubuntu and breaks CRIU
-# https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257
-# Switch to devicemapper
-openj9-test: restart-docker
-	./openj9-test.sh
+java-test:
+	./java-test.sh
 
 setup-vagrant:
 	./vagrant.sh setup
@@ -97,7 +72,23 @@ vagrant-fedora-no-vdso: setup-vagrant
 vagrant-fedora-rawhide: setup-vagrant
 	./vagrant.sh fedora-rawhide
 
-.PHONY: setup-vagrant vagrant-fedora-no-vdso vagrant-fedora-rawhide
+vagrant-fedora-non-root: setup-vagrant
+	./vagrant.sh fedora-non-root
+
+.PHONY: setup-vagrant vagrant-fedora-no-vdso vagrant-fedora-rawhide vagrant-fedora-non-root
+
+check-commit:
+	($(MAKE) -j $$(nproc) -C ../.. && \
+		echo "Commit $$(git rev-parse --short HEAD) built successfully") || \
+		(echo "Build failed for $$(git rev-list -n 1 --pretty HEAD)" && \
+		exit 1)
+
+.PHONY: check-commit
+
+loongarch64-qemu-test:
+	./loongarch64-qemu-test.sh
+
+.PHONY: loongarch64-qemu-test
 
 %:
 	$(MAKE) -C ../build $@$(target-suffix)
diff --git a/scripts/ci/asan.sh b/scripts/ci/asan.sh
index 8113b9b19..8b72fa5f1 100755
--- a/scripts/ci/asan.sh
+++ b/scripts/ci/asan.sh
@@ -1,11 +1,12 @@
 #!/bin/bash
 
-# shellcheck disable=2044
-
 set -x
 
 cat /proc/self/mountinfo
 
+time make ASAN=1 -j 4 V=1
+time make -j4 -C test/zdtm V=1
+
 chmod 0777 test
 chmod 0777 test/zdtm/transition/
 chmod 0777 test/zdtm/static
@@ -13,7 +14,8 @@ chmod 0777 test/zdtm/static
 ./test/zdtm.py run -a --keep-going -k always --parallel 4 -x zdtm/static/rtc "$@"
 ret=$?
 
-for i in $(find / -name 'asan.log*'); do
+shopt -s globstar nullglob
+for i in /**/asan.log*; do
 	echo "$i"
 	echo ========================================
 	cat "$i"
diff --git a/scripts/ci/docker-test.sh b/scripts/ci/docker-test.sh
index f36b4e458..c1c745544 100755
--- a/scripts/ci/docker-test.sh
+++ b/scripts/ci/docker-test.sh
@@ -1,46 +1,35 @@
 #!/bin/bash
 
-# shellcheck disable=SC1091,SC2015
-
 set -x -e -o pipefail
 
-./apt-install \
-    apt-transport-https \
-    ca-certificates \
-    curl \
-    software-properties-common
+# Workaround: Docker 28.x and 29.x has a known regression that breaks the checkpoint and
+# restore (C/R) feature. Let's install previous, or next major version. See
+# https://github.com/moby/moby/issues/50750 for details on the bug.
+export DEBIAN_FRONTEND=noninteractive
+apt remove -y docker-ce docker-ce-cli
+../../contrib/apt-install -y ca-certificates curl
+install -m 0755 -d /etc/apt/keyrings
+curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
+chmod a+r /etc/apt/keyrings/docker.asc
+# shellcheck disable=SC1091
+echo \
+  "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
+  $(. /etc/os-release && echo "${UBUNTU_CODENAME:-$VERSION_CODENAME}") stable" > /etc/apt/sources.list.d/docker.list
+apt update -y
+apt-cache madison docker-ce | awk '{ print $3 }'
+verstr="$(apt-cache madison docker-ce | awk '{ print $3 }' | sort | grep -Ev ':(28|29)\.'| tail -n 1)"
+../../contrib/apt-install -y "docker-ce=$verstr" "docker-ce-cli=$verstr"
 
-curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
-
-add-apt-repository \
-   "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
-   $(lsb_release -cs) \
-   stable test"
-
-./apt-install docker-ce
-
-. /etc/lsb-release
-
-# overlayfs with current Ubuntu kernel breaks CRIU
-# https://bugs.launchpad.net/ubuntu/+source/linux-azure/+bug/1967924
-# Use devicemapper storage drive as a work-around
-echo '{ "experimental": true, "storage-driver": "devicemapper" }' > /etc/docker/daemon.json
+# docker checkpoint and restore is an experimental feature
+echo '{ "experimental": true }' > /etc/docker/daemon.json
+service docker restart
 
 CRIU_LOG='/criu.log'
 mkdir -p /etc/criu
 echo "log-file=$CRIU_LOG" > /etc/criu/runc.conf
 
-service docker stop
-systemctl stop containerd.service
-
-# Always use the latest containerd release.
-# Restore with containerd versions after v1.2.14 and before v1.5.0-beta.0 are broken.
-# https://github.com/checkpoint-restore/criu/issues/1223
-CONTAINERD_DOWNLOAD_URL=$(curl -s https://api.github.com/repos/containerd/containerd/releases/latest | grep '"browser_download_url":.*/containerd-.*-linux-amd64.tar.gz.$' | cut -d\" -f4)
-wget -nv "$CONTAINERD_DOWNLOAD_URL" -O - | tar -xz -C /usr/
-
-systemctl restart containerd.service
-service docker restart
+# Test checkpoint/restore with action script
+echo "action-script /usr/bin/true" | sudo tee /etc/criu/default.conf
 
 export SKIP_CI_TEST=1
 
@@ -88,17 +77,35 @@ checkpoint_container () {
 	docker wait cr
 }
 
-restore_container () {
-	CHECKPOINT_NAME=$1
-
-	docker start --checkpoint "$CHECKPOINT_NAME" cr 2>&1 | tee log || {
+print_logs () {
 	cat "$(grep log 'log file:' | sed 's/log file:\s*//')" || true
 		docker logs cr || true
 		cat $CRIU_LOG || true
 		dmesg
 		docker ps
 		exit 1
-	}
+}
+
+declare -i max_restore_container_tries=3
+
+restore_container () {
+	CHECKPOINT_NAME=$1
+
+	for i in $(seq $max_restore_container_tries); do
+		docker start --checkpoint "$CHECKPOINT_NAME" cr 2>&1 | tee log && break
+
+		# FIXME: There is a race condition in docker/containerd that causes
+		# docker to occasionally fail when starting a container from a
+		# checkpoint immediately after the checkpoint has been created.
+		# https://github.com/moby/moby/issues/42900
+		if grep -Eq '^Error response from daemon: failed to upload checkpoint to containerd: commit failed: content sha256:.*: already exists$' log; then
+			echo "Retry container restore: $i/$max_restore_container_tries"
+			sleep 1;
+		else
+			print_logs
+		fi
+
+	done
 }
 
 # Scenario: Create multiple containers and checkpoint and restore them once
diff --git a/scripts/ci/java-test.sh b/scripts/ci/java-test.sh
new file mode 100755
index 000000000..a5b13a107
--- /dev/null
+++ b/scripts/ci/java-test.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+cd ../.. || exit 1
+
+sudo modprobe iptable_filter
+
+failures=""
+
+docker build -t criu-openj9-ubuntu-test:latest -f scripts/build/Dockerfile.openj9-ubuntu .
+if ! docker run --rm --privileged criu-openj9-ubuntu-test:latest; then
+	failures="$failures openj9-ubuntu"
+fi
+
+docker build -t criu-hotspot-alpine-test:latest -f scripts/build/Dockerfile.hotspot-alpine .
+if ! docker run --rm --privileged criu-hotspot-alpine-test:latest; then
+	failures="$failures hotspot-alpine"
+fi
+
+docker build -t criu-hotspot-ubuntu-test:latest -f scripts/build/Dockerfile.hotspot-ubuntu .
+if ! docker run --rm --privileged criu-hotspot-ubuntu-test:latest; then
+	failures="$failures hotspot-ubuntu"
+fi
+
+if [ -n "$failures" ]; then
+	echo "Tests failed on $failures"
+	exit 1
+fi
diff --git a/scripts/ci/loongarch64-qemu-test.sh b/scripts/ci/loongarch64-qemu-test.sh
new file mode 100755
index 000000000..7e00ab65a
--- /dev/null
+++ b/scripts/ci/loongarch64-qemu-test.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+set -o nounset
+set -o errexit
+set -x
+
+../../contrib/apt-install \
+    apt-transport-https \
+    ca-certificates \
+    curl \
+    software-properties-common \
+    sshpass \
+    openssh-client
+
+curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
+
+add-apt-repository \
+   "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
+   $(lsb_release -cs) \
+   stable test"
+
+../../contrib/apt-install docker-ce
+
+# shellcheck source=/dev/null
+. /etc/lsb-release
+
+# docker checkpoint and restore is an experimental feature
+echo '{ "experimental": true }' > /etc/docker/daemon.json
+service docker restart
+
+docker info
+
+# run a loongarch64 vm
+
+PORT='2222'
+USER='root'
+PASSWORD='loongarch64'
+NAME='vm'
+
+docker run \
+    -d \
+    --net host \
+    --name $NAME \
+    merore/archlinux-loongarch64
+
+run() {
+    if [ -z "$1" ]; then
+        echo "Command cannot be empty."
+        exit 1
+    fi
+    sshpass -p $PASSWORD ssh -o StrictHostKeyChecking=no -p $PORT $USER@127.0.0.1 "$1"
+}
+
+# wait vm to start
+while (! run "uname -a")
+do
+    echo "Wait vm to start..."
+    sleep 1
+done
+echo "The loongarch64 vm is started!"
+
+# Tar criu and send to vm
+tar -cf criu.tar ../../../criu
+sshpass -p $PASSWORD scp -o StrictHostKeyChecking=no -P $PORT criu.tar $USER@127.0.0.1:/root
+
+# build and test
+run 'cd /root; tar -xf criu.tar'
+run 'cd /root/criu; make -j4 && make -j4 -C test/zdtm'
+run "cd /root/criu; ./test/zdtm.py run -t zdtm/static/maps02 -t zdtm/static/maps05 -t zdtm/static/maps06 -t zdtm/static/maps10 -t zdtm/static/maps_file_prot -t zdtm/static/memfd00 -t zdtm/transition/fork -t zdtm/transition/fork2 -t zdtm/transition/shmem -f h"
diff --git a/scripts/ci/openj9-test.sh b/scripts/ci/openj9-test.sh
deleted file mode 100755
index b8c07f180..000000000
--- a/scripts/ci/openj9-test.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-
-cd ../.. || exit 1
-
-failures=""
-
-docker build -t criu-openj9-ubuntu-test:latest -f scripts/build/Dockerfile.openj9-ubuntu .
-if ! docker run --rm --privileged criu-openj9-ubuntu-test:latest; then
-	failures="$failures ubuntu"
-fi
-
-docker build -t criu-openj9-alpine-test:latest -f scripts/build/Dockerfile.openj9-alpine .
-if ! docker run --rm --privileged criu-openj9-alpine-test:latest; then
-	failures="$failures alpine"
-fi
-
-if [ -n "$failures" ]; then
-	echo "Tests failed on $failures"
-	exit 1
-fi
diff --git a/scripts/ci/podman-test.sh b/scripts/ci/podman-test.sh
index 414004514..185783011 100755
--- a/scripts/ci/podman-test.sh
+++ b/scripts/ci/podman-test.sh
@@ -11,29 +11,23 @@ make install PREFIX=/usr
 
 criu --version
 
-# Install crun build dependencies
-scripts/ci/apt-install libyajl-dev libseccomp-dev libsystemd-dev
+# FIXME: Disable checkpoint/restore of cgroups
+# https://github.com/checkpoint-restore/criu/issues/2091
+mkdir -p /etc/criu
+echo "manage-cgroups ignore" > /etc/criu/runc.conf
+sed -i 's/#runtime\s*=\s*.*/runtime = "runc"/' /usr/share/containers/containers.conf
 
-# Install crun from source to test libcriu integration
-tmp_dir=$(mktemp -d -t ci-XXXXXXXXXX)
-pushd "${tmp_dir}"
-git clone --depth=1 https://github.com/containers/crun
-cd crun
-./autogen.sh && ./configure --prefix=/usr
-make -j"$(nproc)"
-make install
-popd
-rm -rf "${tmp_dir}"
+# Test checkpoint/restore with action script
+echo "action-script /usr/bin/true" | sudo tee /etc/criu/default.conf
 
-# overlayfs with current Ubuntu kernel breaks CRIU
-# https://bugs.launchpad.net/ubuntu/+source/linux-azure/+bug/1967924
-# Use VFS storage drive as a work-around
-export STORAGE_DRIVER=vfs
-podman --storage-driver vfs info
+cat /proc/self/mountinfo
+podman info
 
-# shellcheck disable=SC2016
 podman run --name cr -d docker.io/library/alpine /bin/sh -c 'i=0; while true; do echo $i; i=$(expr $i + 1); sleep 1; done'
 
+# Show criu logs in case of error
+trap 'cat /var/lib/containers/storage/overlay-containers/*/userdata/*.log' EXIT
+
 sleep 1
 for i in $(seq 20); do
 	echo "Test $i for podman container checkpoint"
@@ -74,3 +68,5 @@ for i in $(seq 20); do
 	podman ps -a
 	rm -f /tmp/chkpt.tar.gz
 done
+
+trap 'echo PASS' EXIT
\ No newline at end of file
diff --git a/scripts/ci/prepare-for-fedora-rawhide.sh b/scripts/ci/prepare-for-fedora-rawhide.sh
index f4d3155f9..b0b45fcc3 100755
--- a/scripts/ci/prepare-for-fedora-rawhide.sh
+++ b/scripts/ci/prepare-for-fedora-rawhide.sh
@@ -1,40 +1,21 @@
 #!/bin/bash
 set -e -x
 
+contrib/dependencies/dnf-packages.sh
 dnf install -y \
 	diffutils \
+	e2fsprogs \
 	findutils \
-	gcc \
-	git \
-	gnutls-devel \
+	gawk \
 	gzip \
-	iproute \
-	iptables \
-	nftables \
-	nftables-devel \
-	libaio-devel \
-	libasan \
-	libcap-devel \
-	libnet-devel \
-	libnl3-devel \
-	libbsd-devel \
-	make \
+	kmod \
+	libselinux-utils \
 	procps-ng \
-	protobuf-c-devel \
-	protobuf-devel \
-	python3-flake8 \
-	python3-PyYAML \
-	python3-future \
-	python3-protobuf \
-	python3-junit_xml \
+	python3-pip \
 	python-unversioned-command \
 	redhat-rpm-config \
 	sudo \
-	tar \
-	which \
-	e2fsprogs \
-	rubygem-asciidoctor \
-	kmod
+	tar
 
 # /tmp is no longer 755 in the rawhide container image and breaks CI - fix it
 chmod 1777 /tmp
diff --git a/scripts/ci/riscv64-cross/amd64-sources.list b/scripts/ci/riscv64-cross/amd64-sources.list
new file mode 100644
index 000000000..72dad920c
--- /dev/null
+++ b/scripts/ci/riscv64-cross/amd64-sources.list
@@ -0,0 +1,10 @@
+deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy main restricted
+deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-updates main restricted
+deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy universe
+deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-updates universe
+deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy multiverse
+deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-updates multiverse
+deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse
+deb [arch=amd64] http://security.ubuntu.com/ubuntu/ jammy-security main restricted
+deb [arch=amd64] http://security.ubuntu.com/ubuntu/ jammy-security universe
+deb [arch=amd64] http://security.ubuntu.com/ubuntu/ jammy-security multiverse
\ No newline at end of file
diff --git a/scripts/ci/riscv64-cross/riscv64-sources.list b/scripts/ci/riscv64-cross/riscv64-sources.list
new file mode 100644
index 000000000..67b8067b6
--- /dev/null
+++ b/scripts/ci/riscv64-cross/riscv64-sources.list
@@ -0,0 +1,42 @@
+# See http://help.ubuntu.com/community/UpgradeNotes for how to upgrade to
+# newer versions of the distribution.
+deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted
+# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted
+
+## Major bug fix updates produced after the final release of the
+## distribution.
+deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted
+# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted
+
+## N.B. software from this repository is ENTIRELY UNSUPPORTED by the Ubuntu
+## team. Also, please note that software in universe WILL NOT receive any
+## review or updates from the Ubuntu security team.
+deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy universe
+# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy universe
+deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-updates universe
+# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-updates universe
+
+## N.B. software from this repository is ENTIRELY UNSUPPORTED by the Ubuntu
+## team, and may not be under a free licence. Please satisfy yourself as to
+## your rights to use the software. Also, please note that software in
+## multiverse WILL NOT receive any review or updates from the Ubuntu
+## security team.
+deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy multiverse
+# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy multiverse
+deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-updates multiverse
+# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-updates multiverse
+
+## N.B. software from this repository may not have been tested as
+## extensively as that contained in the main release, although it includes
+## newer versions of some applications which may provide useful features.
+## Also, please note that software in backports WILL NOT receive any review
+## or updates from the Ubuntu security team.
+deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted universe multiverse
+# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted universe multiverse
+
+deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted
+# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted
+deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-security universe
+# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-security universe
+deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-security multiverse
+# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-security multiverse
\ No newline at end of file
diff --git a/scripts/ci/run-ci-tests.sh b/scripts/ci/run-ci-tests.sh
index 8d9de6e55..05a3b71e8 100755
--- a/scripts/ci/run-ci-tests.sh
+++ b/scripts/ci/run-ci-tests.sh
@@ -1,25 +1,20 @@
 #!/bin/bash
 set -x -e
 
-CI_PKGS="protobuf-c-compiler libprotobuf-c-dev libaio-dev libgnutls28-dev
-		libgnutls30 libprotobuf-dev protobuf-compiler libcap-dev
-		libnl-3-dev gdb bash libnet-dev util-linux asciidoctor
-		libnl-route-3-dev time flake8 libbsd-dev python3-yaml
-		libperl-dev pkg-config python3-future python3-protobuf
-		python3-junit.xml"
+CI_PKGS=()
 
-X86_64_PKGS="gcc-multilib"
+X86_64_PKGS=(gcc-multilib)
+
+# Convert from string to array.
+IFS=" " read -r -a ZDTM_OPTS <<< "$ZDTM_OPTS"
 
 UNAME_M=$(uname -m)
 
 if [ "$UNAME_M" != "x86_64" ]; then
-	# For Travis only x86_64 seems to be baremetal. Other
-	# architectures are running in unprivileged LXD containers.
-	# That seems to block most of CRIU's interfaces.
-
-	# But with the introduction of baremetal aarch64 systems in
-	# Travis (arch: arm64-graviton2) we can override this using
-	# an environment variable
+	# Some tests rely on kernel features that may not be available
+	# when running in a container. Here we assume that x86_64 systems
+	# are baremetal, and skip the tests for all other  CPU architectures.
+	# The RUN_TESTS environment variable can override this, e.g., for aarch64.
 	[ -n "$RUN_TESTS" ] || SKIP_CI_TEST=1
 fi
 
@@ -33,9 +28,13 @@ ci_prep () {
 	# not run anymore with 'sudo -u \#1000' if the UID does not exist.
 	adduser -u 1000 --disabled-password --gecos "criutest" criutest || :
 
-	# This can fail on aarch64 travis
+	# This can fail on aarch64
 	service apport stop || :
 
+	# Ubuntu has set up AppArmor in 24.04 so that it blocks use of user
+	# namespaces by unprivileged users. We need this for some of our tests.
+	sysctl kernel.apparmor_restrict_unprivileged_userns=0 || :
+
 	if [ "$CLANG" = "1" ]; then
 		# clang support
 		CC=clang
@@ -46,19 +45,16 @@ ci_prep () {
 	else
 		CC=gcc
 	fi
-	CI_PKGS="$CI_PKGS $CC"
+	CI_PKGS+=("$CC")
 
 	# Do not install x86_64 specific packages on other architectures
 	if [ "$UNAME_M" = "x86_64" ]; then
-		CI_PKGS="$CI_PKGS $X86_64_PKGS"
+		CI_PKGS+=("${X86_64_PKGS[@]}")
 	fi
 
-	scripts/ci/apt-install "$CI_PKGS"
+	contrib/dependencies/apt-packages.sh
+	contrib/apt-install "${CI_PKGS[@]}"
 	chmod a+x "$HOME"
-
-	# zdtm uses an unversioned python binary to run the tests.
-	# let's point python to python3
-	ln -sf /usr/bin/python3 /usr/bin/python
 }
 
 test_stream() {
@@ -69,9 +65,8 @@ test_stream() {
 	# restorer and eventually close the page read. However, image-streamer expects the
 	# whole image to be read and the image is not reopened, sent twice. These MAP_HUGETLB
 	# test cases will result in EPIPE error at the moment.
-	STREAM_TEST_EXCLUDE="-x maps09 -x maps10"
-	# shellcheck disable=SC2086
-	./test/zdtm.py run --stream -p 2 --keep-going -a $STREAM_TEST_EXCLUDE $ZDTM_OPTS
+	STREAM_TEST_EXCLUDE=(-x maps09 -x maps10)
+	./test/zdtm.py run --stream -p 2 --keep-going -a "${STREAM_TEST_EXCLUDE[@]}" "${ZDTM_OPTS[@]}"
 }
 
 print_header() {
@@ -123,8 +118,14 @@ if [ "${CD_TO_TOP}" = "1" ]; then
 fi
 
 export GCOV CC
+if [ -z "$COMPILE_FLAGS" ]; then
+	LOCAL_COMPILE_FLAGS=("V=1")
+else
+	IFS=" " read -r -a LOCAL_COMPILE_FLAGS <<< "$COMPILE_FLAGS"
+	LOCAL_COMPILE_FLAGS=("V=1" "${LOCAL_COMPILE_FLAGS[@]}")
+fi
 $CC --version
-time make CC="$CC" -j4 V=1
+time make CC="$CC" -j4 "${LOCAL_COMPILE_FLAGS[@]}"
 
 ./criu/criu -v4 cpuinfo dump || :
 ./criu/criu -v4 cpuinfo check || :
@@ -142,11 +143,17 @@ time make unittest
 
 [ -n "$SKIP_CI_TEST" ] && exit 0
 
+# Umount cpuset in cgroupv1 to make it move to cgroupv2
+if [ -d /sys/fs/cgroup/cpuset ]; then
+	umount /sys/fs/cgroup/cpuset
+fi
+
 ulimit -c unlimited
 
 cgid=$$
 cleanup_cgroup() {
 	./test/zdtm_umount_cgroups $cgid
+	dmesg
 }
 trap cleanup_cgroup EXIT
 ./test/zdtm_mount_cgroups $cgid
@@ -160,21 +167,20 @@ if [ "${COMPAT_TEST}x" = "yx" ] ; then
 	# for 32-bit tests. A better way would involve launching docker..
 	# But it would require making zdtm.py aware of docker and launching
 	# tests inside the CT.
-	INCOMPATIBLE_LIBS="libaio-dev libcap-dev libnl-3-dev libnl-route-3-dev"
-	IA32_PKGS=""
+	INCOMPATIBLE_LIBS=(libaio-dev libcap-dev libnl-3-dev libnl-route-3-dev)
+	IA32_PKGS=()
 	REFUGE=64-refuge
 
 	mkdir "$REFUGE"
-	for i in $INCOMPATIBLE_LIBS ; do
+	for i in "${INCOMPATIBLE_LIBS[@]}" ; do
 		for j in $(dpkg --listfiles "$i" | grep '\.so$') ; do
 			cp "$j" "$REFUGE/"
 		done
-		IA32_PKGS="$IA32_PKGS $i:i386"
+		IA32_PKGS+=("$i:i386")
 	done
-	# shellcheck disable=SC2086
-	apt-get remove $INCOMPATIBLE_LIBS
+	apt-get remove "${INCOMPATIBLE_LIBS[@]}"
 	dpkg --add-architecture i386
-	scripts/ci/apt-install "$IA32_PKGS"
+	contrib/apt-install "${IA32_PKGS[@]}"
 	mkdir -p /usr/lib/x86_64-linux-gnu/
 	mv "$REFUGE"/* /usr/lib/x86_64-linux-gnu/
 fi
@@ -211,15 +217,12 @@ if [ "${STREAM_TEST}" = "1" ]; then
 	exit 0
 fi
 
-# shellcheck disable=SC2086
-./test/zdtm.py run -a -p 2 --keep-going $ZDTM_OPTS
+./test/zdtm.py run -a -p 2 --keep-going "${ZDTM_OPTS[@]}"
 if criu/criu check --feature move_mount_set_group; then
-	# shellcheck disable=SC2086
-	./test/zdtm.py run -a -p 2 --mntns-compat-mode --keep-going $ZDTM_OPTS
+	./test/zdtm.py run -a -p 2 --mntns-compat-mode --keep-going "${ZDTM_OPTS[@]}"
 fi
 
-# shellcheck disable=SC2086
-./test/zdtm.py run -a -p 2 --keep-going --criu-config $ZDTM_OPTS
+./test/zdtm.py run -a -p 2 --keep-going --criu-config "${ZDTM_OPTS[@]}"
 
 # Newer kernels are blocking access to userfaultfd:
 # uffd: Set unprivileged_userfaultfd sysctl knob to 1 if kernel faults must be handled without obtaining CAP_SYS_PTRACE capability
@@ -227,17 +230,14 @@ if [ -e /proc/sys/vm/unprivileged_userfaultfd ]; then
 	echo 1 > /proc/sys/vm/unprivileged_userfaultfd
 fi
 
-LAZY_EXCLUDE="-x maps04 -x cmdlinenv00 -x maps007"
+LAZY_EXCLUDE=(-x maps04 -x cmdlinenv00 -x maps007)
 
 LAZY_TESTS='.*(maps0|uffd-events|lazy-thp|futex|fork).*'
-LAZY_OPTS="-p 2 -T $LAZY_TESTS $LAZY_EXCLUDE $ZDTM_OPTS"
+LAZY_OPTS=(-p 2 -T "$LAZY_TESTS" "${LAZY_EXCLUDE[@]}" "${ZDTM_OPTS[@]}")
 
-# shellcheck disable=SC2086
-./test/zdtm.py run $LAZY_OPTS --lazy-pages
-# shellcheck disable=SC2086
-./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages
-# shellcheck disable=SC2086
-./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages --tls
+./test/zdtm.py run "${LAZY_OPTS[@]}" --lazy-pages
+./test/zdtm.py run "${LAZY_OPTS[@]}" --remote-lazy-pages
+./test/zdtm.py run "${LAZY_OPTS[@]}" --remote-lazy-pages --tls
 
 bash -x ./test/jenkins/criu-fault.sh
 if [ "$UNAME_M" == "x86_64" ]; then
@@ -255,15 +255,20 @@ if [ -z "$SKIP_EXT_DEV_TEST" ]; then
 fi
 
 make -C test/others/make/ run CC="$CC"
-if [ -n "$TRAVIS" ] || [ -n "$CIRCLECI" ]; then
+if [ -n "$CIRCLECI" ]; then
        # GitHub Actions (and Cirrus CI) does not provide a real TTY and CRIU will fail with:
        # Error (criu/tty.c:1014): tty: Don't have tty to inherit session from, aborting
        make -C test/others/shell-job/ run
 fi
+make -C test/others/criu-ns/ run
+make -C test/others/skip-file-rwx-check/ run
 make -C test/others/rpc/ run
 
 ./test/zdtm.py run -t zdtm/static/env00 --sibling
 
+./test/zdtm.py run -t zdtm/static/maps00 --preload-libfault
+./test/zdtm.py run -t zdtm/static/maps02 --preload-libfault
+
 ./test/zdtm.py run -t zdtm/transition/maps007 --pre 2 --dedup
 ./test/zdtm.py run -t zdtm/transition/maps007 --pre 2 --noauto-dedup
 ./test/zdtm.py run -t zdtm/transition/maps007 --pre 2 --page-server
@@ -288,6 +293,45 @@ ip net add test
 ./test/zdtm.py run -t zdtm/static/env00 -t zdtm/transition/fork -t zdtm/static/ghost_holes00 -t zdtm/static/socket-tcp -t zdtm/static/msgque -k always
 ./test/crit-recode.py
 
+# Rootless tests
+# Check if cap_checkpoint_restore is supported and also if unshare -c is supported.
+#
+# Do not run this test in a container (see https://github.com/checkpoint-restore/criu/issues/2312).
+# Before v6.8-rc1~215^2~6, the kernel currently did not show correct device and
+# inode numbers in /proc/pid/maps for stackable file systems.
+skip=0
+findmnt -no FSTYPE / | grep overlay && {
+	./criu/criu check --feature overlayfs_maps || skip=1
+}
+unshare -c /bin/true || skip=1
+capsh --supports=cap_checkpoint_restore || skip=1
+
+if [ "$skip" == 0 ]; then
+	make -C test/zdtm/ cleanout
+	rm -rf test/dump
+	setcap cap_checkpoint_restore,cap_sys_ptrace+eip criu/criu
+	if [ -d /sys/fs/selinux ] && command -v getenforce &>/dev/null; then
+		# Note: selinux in Enforcing mode prevents us from calling clone3() or writing to ns_last_pid on restore; hence set to Permissive for the test and then set back.
+		selinuxmode=$(getenforce)
+		if [ "$selinuxmode" != "Disabled" ]; then
+			setenforce Permissive
+		fi
+
+	fi
+	# Run it as non-root in a user namespace. Since CAP_CHECKPOINT_RESTORE behaves differently in non-user namespaces (e.g. no access to map_files) this tests that we can dump and restore
+	# under those conditions. Note that the "... && true" part is necessary; we need at least one statement after the tests so that bash can reap zombies in the user namespace,
+	# otherwise it will exec the last statement and get replaced and nobody will be left to reap our zombies.
+	sudo --user=#65534 --group=#65534 unshare -Ucfpm --mount-proc -- bash -c "./test/zdtm.py run -t zdtm/static/maps00 -f h --rootless && true"
+	if [ -d /sys/fs/selinux ] && command -v getenforce &>/dev/null; then
+		if [ "$selinuxmode" != "Disabled" ]; then
+			setenforce "$selinuxmode"
+		fi
+	fi
+	setcap -r criu/criu
+else
+	echo "Skipping unprivileged mode tests"
+fi
+
 # more crit testing
 make -C test/others/crit run
 
@@ -303,6 +347,9 @@ make -C test/others/ns_ext run
 # config file parser and parameter testing
 make -C test/others/config-file run
 
+# action script testing
+make -C test/others/action-script run
+
 # Skip all further tests when running with GCOV=1
 # The one test which currently cannot handle GCOV testing is compel/test
 # Probably because the GCOV Makefile infrastructure does not exist in compel
@@ -310,3 +357,15 @@ make -C test/others/config-file run
 
 # compel testing
 make -C compel/test
+
+# amdgpu and cuda plugin testing
+make amdgpu_plugin
+make -C plugins/amdgpu/ test_topology_remap
+./plugins/amdgpu/test_topology_remap
+
+./test/zdtm.py run -t zdtm/static/maps00 -t zdtm/static/maps02 --criu-plugin cuda
+./test/zdtm.py run -t zdtm/static/maps00 -t zdtm/static/maps02 --criu-plugin amdgpu
+./test/zdtm.py run -t zdtm/static/maps00 -t zdtm/static/maps02 --criu-plugin amdgpu cuda
+./test/zdtm.py run -t zdtm/static/busyloop00 --criu-plugin inventory_test_enabled inventory_test_disabled
+
+./test/zdtm.py run -t zdtm/static/sigpending -t zdtm/static/pthread00 --mocked-cuda-checkpoint --fault 138
diff --git a/scripts/ci/vagrant.sh b/scripts/ci/vagrant.sh
index af0f7335a..5f2de32b8 100755
--- a/scripts/ci/vagrant.sh
+++ b/scripts/ci/vagrant.sh
@@ -6,42 +6,42 @@
 set -e
 set -x
 
-VAGRANT_VERSION=2.2.19
-FEDORA_VERSION=35
-FEDORA_BOX_VERSION=35.20211026.0
+VAGRANT_VERSION=2.4.7
+FEDORA_VERSION=42
+FEDORA_BOX_VERSION=1.1.0
 
 setup() {
-	if [ -n "$TRAVIS" ]; then
-		# Load the kvm modules for vagrant to use qemu
-		modprobe kvm kvm_intel
-	fi
-
 	# Tar up the git checkout to have vagrant rsync it to the VM
-	tar cf criu.tar ../../../criu
+	tar cf /tmp/criu.tar -C ../../../ criu
 	# Cirrus has problems with the following certificate.
-	wget --no-check-certificate https://releases.hashicorp.com/vagrant/${VAGRANT_VERSION}/vagrant_${VAGRANT_VERSION}_"$(uname -m)".deb -O /tmp/vagrant.deb && \
+	wget --no-check-certificate https://releases.hashicorp.com/vagrant/${VAGRANT_VERSION}/vagrant_${VAGRANT_VERSION}-1_"$(dpkg --print-architecture)".deb -O /tmp/vagrant.deb && \
 		dpkg -i /tmp/vagrant.deb
 
-	./apt-install libvirt-clients libvirt-daemon-system libvirt-dev qemu-utils qemu \
-		ruby build-essential libxml2-dev qemu-kvm rsync ebtables dnsmasq-base \
-		openssh-client
+	../../contrib/apt-install libvirt-clients libvirt-daemon-system libvirt-dev qemu-utils qemu-system \
+		ruby build-essential libxml2-dev qemu-kvm rsync ebtables dnsmasq-base openssh-client
 	systemctl restart libvirtd
 	vagrant plugin install vagrant-libvirt
-	vagrant init fedora/${FEDORA_VERSION}-cloud-base --box-version ${FEDORA_BOX_VERSION}
+	vagrant init cloud-image/fedora-${FEDORA_VERSION} --box-version ${FEDORA_BOX_VERSION}
+
 	# The default libvirt Vagrant VM uses 512MB.
-	# Travis VMs should have around 7.5GB.
+	# VMs in our CI typically have around 16GB.
 	# Increasing it to 4GB should work.
 	sed -i Vagrantfile -e 's,^end$,  config.vm.provider :libvirt do |libvirt|'"\n"'    libvirt.memory = 4096;end'"\n"'end,g'
+	# Sync /tmp/criu.tar into the VM
+	# We want to use $HOME without expansion
+	# shellcheck disable=SC2016
+	sed -i Vagrantfile -e 's|^end$|  config.vm.provision "file", source: "/tmp/criu.tar", destination: "$HOME/criu.tar"'"\n"'end|g'
+
 	vagrant up --provider=libvirt --no-tty
 	mkdir -p /root/.ssh
 	vagrant ssh-config >> /root/.ssh/config
-	ssh default sudo dnf upgrade -y
-	ssh default sudo dnf install -y gcc git gnutls-devel nftables-devel libaio-devel \
-		libasan libcap-devel libnet-devel libnl3-devel libbsd-devel make protobuf-c-devel \
-		protobuf-devel python3-flake8 python3-future python3-protobuf \
-		python3-junit_xml rubygem-asciidoctor iptables libselinux-devel libbpf-devel
+
 	# Disable sssd to avoid zdtm test failures in pty04 due to sssd socket
 	ssh default sudo systemctl mask sssd
+
+	ssh default 'sudo mkdir -p --mode=777 /vagrant && mv $HOME/criu.tar /vagrant && cd /vagrant && tar xf criu.tar'
+	ssh default sudo dnf upgrade -y
+	ssh default sudo /vagrant/criu/contrib/dependencies/dnf-packages.sh
 	ssh default cat /proc/cmdline
 }
 
@@ -49,7 +49,7 @@ fedora-no-vdso() {
 	ssh default sudo grubby --update-kernel ALL --args="vdso=0"
 	vagrant reload
 	ssh default cat /proc/cmdline
-	ssh default 'cd /vagrant; tar xf criu.tar; cd criu; make -j 4'
+	ssh default 'cd /vagrant/criu; make -j'
 	ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -a --keep-going'
 	# This test (pidfd_store_sk) requires pidfd_getfd syscall which is guaranteed in Fedora 33.
 	# It is also skipped from -a because it runs in RPC mode only
@@ -57,6 +57,15 @@ fedora-no-vdso() {
 }
 
 fedora-rawhide() {
+	# Upgrade the kernel to the latest vanilla one
+	ssh default sudo dnf -y copr enable @kernel-vanilla/stable
+	ssh default sudo dnf upgrade -y
+
+	# The 6.2 kernel of Fedora 38 in combination with rawhide userspace breaks
+	# zdtm/static/socket-tcp-nfconntrack. To activate the new kernel previously
+	# installed this reboots the VM.
+	vagrant reload
+	ssh default uname -a
 	#
 	# Workaround the problem:
 	# error running container: error from /usr/bin/crun creating container for [...]: sd-bus call: Transport endpoint is not connected
@@ -65,7 +74,28 @@ fedora-rawhide() {
 	#
 	ssh default 'sudo dnf remove -y crun || true'
 	ssh default sudo dnf install -y podman runc
-	ssh default 'cd /vagrant; tar xf criu.tar; cd criu; sudo -E make -C scripts/ci fedora-rawhide CONTAINER_RUNTIME=podman BUILD_OPTIONS="--security-opt seccomp=unconfined"'
+	# Some tests in the container need selinux to be disabled.
+	# In the container it is not possible to change the state of selinux.
+	# Let's just disable it for this test run completely.
+	ssh default 'sudo setenforce Permissive'
+	ssh default 'cd /vagrant/criu; sudo -E make -C scripts/ci fedora-rawhide CONTAINER_RUNTIME=podman BUILD_OPTIONS="--security-opt seccomp=unconfined"'
+}
+
+fedora-non-root() {
+	ssh default uname -a
+	ssh default 'cd /vagrant/criu; make -j'
+	# Setting the capability should be the only line needed to run as non-root on Fedora
+	# In other environments either set /proc/sys/kernel/yama/ptrace_scope to 0 or grant cap_sys_ptrace to criu
+	ssh default 'sudo setcap cap_checkpoint_restore+eip /vagrant/criu/criu/criu'
+	# Run it once as non-root
+	ssh default 'cd /vagrant/criu; criu/criu check --unprivileged; ./test/zdtm.py run -t zdtm/static/env00 -t zdtm/static/pthread00 -f h --rootless'
+	# Run it as root with '--rootless'
+	ssh default 'cd /vagrant/criu; sudo ./test/zdtm.py run -t zdtm/static/env00 -t zdtm/static/pthread00 -f h; sudo chmod 777 test/dump/zdtm/static/{env00,pthread00}; sudo ./test/zdtm.py run -t zdtm/static/env00 -t zdtm/static/pthread00 -f h --rootless'
+	# Run it as non-root in a user namespace. Since CAP_CHECKPOINT_RESTORE behaves differently in non-user namespaces (e.g. no access to map_files) this tests that we can dump and restore
+	# under those conditions. Note that the "... && true" part is necessary; we need at least one statement after the tests so that bash can reap zombies in the user namespace,
+	# otherwise it will exec the last statement and get replaced and nobody will be left to reap our zombies.
+	# Note: selinux in Enforcing mode prevents us from calling clone3() or writing to ns_last_pid on restore; hence set to Permissive for the test and then set back.
+	ssh default 'cd /vagrant/criu; selinuxmode=`getenforce` && sudo setenforce Permissive && unshare -Ucfpm --mount-proc bash -c "./test/zdtm.py run -t zdtm/static/maps00 -f h --rootless && true" && sudo setenforce $selinuxmode'
 }
 
 $1
diff --git a/scripts/crit-setup.py b/scripts/crit-setup.py
deleted file mode 100644
index 13df03e3b..000000000
--- a/scripts/crit-setup.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import os
-from distutils.core import setup
-
-criu_version = "0.0.1"
-env = os.environ
-
-if 'CRIU_VERSION_MAJOR' in env and 'CRIU_VERSION_MINOR' in env:
-    criu_version = '{}.{}'.format(
-        env['CRIU_VERSION_MAJOR'],
-        env['CRIU_VERSION_MINOR']
-    )
-
-    if 'CRIU_VERSION_SUBLEVEL' in env and env['CRIU_VERSION_SUBLEVEL']:
-        criu_version += '.' + env['CRIU_VERSION_SUBLEVEL']
-
-setup(name="crit",
-      version=criu_version,
-      description="CRiu Image Tool",
-      author="CRIU team",
-      author_email="criu@openvz.org",
-      license="GPLv2",
-      url="https://github.com/checkpoint-restore/criu",
-      package_dir={'pycriu': 'lib/py'},
-      packages=["pycriu", "pycriu.images"],
-      scripts=["crit/crit"])
diff --git a/scripts/criu-ns b/scripts/criu-ns
index 9fc58b640..5950d7c50 100755
--- a/scripts/criu-ns
+++ b/scripts/criu-ns
@@ -4,6 +4,9 @@ import ctypes.util
 import errno
 import sys
 import os
+import fcntl
+import termios
+import time
 
 # <sched.h> constants for unshare
 CLONE_NEWNS = 0x00020000
@@ -68,7 +71,19 @@ def _wait_for_process_status(criu_pid):
         try:
             (pid, status) = os.wait()
             if pid == criu_pid:
-                return os.waitstatus_to_exitcode(status)
+                # The following code block is based on
+                # os.waitstatus_to_exitcode() introduced in Python 3.9
+                # and we implement this for comparability with older
+                # versions of Python.
+                if os.WIFSIGNALED(status):
+                    return os.WTERMSIG(status)
+                elif os.WIFEXITED(status):
+                    return os.WEXITSTATUS(status)
+                elif os.WIFSTOPPED(status):
+                    return os.WSTOPSIG(status)
+                else:
+                    raise Exception("CRIU was terminated by an "
+                                    "unidentified reason")
         except OSError:
             return -251
 
@@ -78,8 +93,21 @@ def run_criu(args):
     Spawn CRIU binary
     """
     print(sys.argv)
-    os.execlp('criu', *['criu'] + args)
-    raise OSError(errno.ENOENT, "No such command")
+
+    if "--criu-binary" in args:
+        try:
+            opt_index = args.index("--criu-binary")
+            path = args[opt_index + 1]
+            del args[opt_index:opt_index + 2]
+            args.insert(0, "criu")
+            os.execv(path, args)
+            raise OSError(errno.ENOENT, "No such command")
+        except (ValueError, IndexError, FileNotFoundError):
+            raise OSError(errno.ENOENT, "--criu-binary missing argument")
+    else:
+        args.insert(0, "criu")
+        os.execvp("criu", args)
+        raise OSError(errno.ENOENT, "No such command")
 
 
 # pidns_holder creates a process that is reparented to the init.
@@ -108,8 +136,8 @@ def wrap_restore():
     if '--restore-sibling' in restore_args:
         raise OSError(errno.EINVAL, "--restore-sibling is not supported")
 
-    # Unshare pid and mount namespaces
-    if _unshare(CLONE_NEWNS | CLONE_NEWPID) != 0:
+    # Unshare pid namespace
+    if _unshare(CLONE_NEWPID) != 0:
         _errno = ctypes.get_errno()
         raise OSError(_errno, errno.errorcode[_errno])
 
@@ -121,12 +149,65 @@ def wrap_restore():
         restore_detached = True
         restore_args.remove('--restore-detached')
 
+    restore_pidfile = None
+    if '--pidfile' in restore_args:
+        try:
+            opt_index = restore_args.index('--pidfile')
+            restore_pidfile = restore_args[opt_index + 1]
+            del restore_args[opt_index:opt_index + 2]
+        except (ValueError, IndexError, FileNotFoundError):
+            raise OSError(errno.ENOENT, "--pidfile missing argument")
+
+        if not restore_pidfile.startswith('/'):
+            for base_dir_opt in ['--work-dir', '-W', '--images-dir', '-D']:
+                if base_dir_opt in restore_args:
+                    try:
+                        opt_index = restore_args.index(base_dir_opt)
+                        restore_pidfile = os.path.join(restore_args[opt_index + 1], restore_pidfile)
+                        break
+                    except (ValueError, IndexError, FileNotFoundError):
+                        raise OSError(errno.ENOENT, base_dir_opt + " missing argument")
+
     criu_pid = os.fork()
     if criu_pid == 0:
+        # Unshare mount namespace
+        if _unshare(CLONE_NEWNS) != 0:
+            _errno = ctypes.get_errno()
+            raise OSError(_errno, errno.errorcode[_errno])
+
         os.setsid()
+        # Set stdin tty to be a controlling tty of our new session, this is
+        # required by --shell-job option, as for it CRIU would try to set a
+        # process group of restored root task to be a foreground group on the
+        # terminal.
+        if '--shell-job' in restore_args or '-j' in restore_args:
+            if os.isatty(sys.stdin.fileno()):
+                fcntl.ioctl(sys.stdin.fileno(), termios.TIOCSCTTY, 1)
+            else:
+                raise OSError(errno.EINVAL, 'The stdin is not a tty for a --shell-job')
+
         _mount_new_proc()
         run_criu(restore_args)
 
+    if restore_pidfile:
+        restored_pid = None
+        retry = 5
+
+        while not restored_pid and retry:
+            with open('/proc/%d/task/%d/children' % (criu_pid, criu_pid)) as f:
+                line = f.readline().strip()
+                if len(line):
+                    restored_pid = line
+                    break
+            retry -= 1
+            time.sleep(1)
+
+        if restored_pid:
+            with open(restore_pidfile, 'w+') as f:
+                f.write(restored_pid)
+        else:
+            print("Warn: Search of restored pid for --pidfile option timeouted")
+
     if restore_detached:
         return 0
 
@@ -135,7 +216,7 @@ def wrap_restore():
 
 def get_varg(args):
     for i in range(1, len(sys.argv)):
-        if not sys.argv[i] in args:
+        if sys.argv[i] not in args:
             continue
 
         if i + 1 >= len(sys.argv):
@@ -153,9 +234,9 @@ def _set_namespace(fd):
         raise OSError(_errno, errno.errorcode[_errno])
 
 
-def is_my_namespace(fd):
+def is_my_namespace(fd, ns):
     """Returns True if fd refers to current namespace"""
-    return os.stat('/proc/self/ns/pid').st_ino != os.fstat(fd).st_ino
+    return os.stat('/proc/self/ns/%s' % ns).st_ino == os.fstat(fd).st_ino
 
 
 def set_pidns(tpid, pid_idx):
@@ -165,7 +246,7 @@ def set_pidns(tpid, pid_idx):
     pid namespace.
     """
     ns_fd = os.open('/proc/%s/ns/pid' % tpid, os.O_RDONLY)
-    if is_my_namespace(ns_fd):
+    if not is_my_namespace(ns_fd, "pid"):
         for line in open('/proc/%s/status' % tpid):
             if not line.startswith('NSpid:'):
                 continue
@@ -190,7 +271,7 @@ def set_mntns(tpid):
     will be the same in target mntns.
     """
     ns_fd = os.open('/proc/%s/ns/mnt' % tpid, os.O_RDONLY)
-    if is_my_namespace(ns_fd):
+    if not is_my_namespace(ns_fd, "mnt"):
         root_st = os.stat('/')
         cwd_st = os.stat('.')
         cwd_path = os.path.realpath('.')
diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak
index 592552cb8..727e9689e 100644
--- a/scripts/feature-tests.mak
+++ b/scripts/feature-tests.mak
@@ -35,34 +35,6 @@ int main(void)
 }
 endef
 
-define FEATURE_TEST_STRLCPY
-
-#include <string.h>
-
-#ifdef CONFIG_HAS_LIBBSD
-# include <bsd/string.h>
-#endif
-
-int main(void)
-{
-	return strlcpy(NULL, NULL, 0);
-}
-endef
-
-define FEATURE_TEST_STRLCAT
-
-#include <string.h>
-
-#ifdef CONFIG_HAS_LIBBSD
-# include <bsd/string.h>
-#endif
-
-int main(void)
-{
-	return strlcat(NULL, NULL, 0);
-}
-endef
-
 define FEATURE_TEST_PTRACE_PEEKSIGINFO
 
 #include <sys/ptrace.h>
@@ -137,19 +109,6 @@ ENTRY(main)
 END(main)
 endef
 
-define FEATURE_TEST_FSCONFIG
-
-#include <linux/mount.h>
-
-int main(void)
-{
-	if (FSCONFIG_CMD_CREATE > 0)
-		return 0;
-	return 0;
-}
-
-endef
-
 define FEATURE_TEST_NFTABLES_LIB_API_0
 
 #include <string.h>
@@ -196,3 +155,22 @@ int main(void)
 	return 0;
 }
 endef
+
+define FEATURE_TEST_NO_LIBC_RSEQ_DEFS
+
+#ifdef __has_include
+#if __has_include(\"sys/rseq.h\")
+#include <sys/rseq.h>
+#endif
+#endif
+
+enum rseq_cpu_id_state {
+	RSEQ_CPU_ID_UNINITIALIZED = -1,
+	RSEQ_CPU_ID_REGISTRATION_FAILED = -2,
+};
+
+int main(void)
+{
+	return 0;
+}
+endef
diff --git a/scripts/fetch-clang-format.sh b/scripts/fetch-clang-format.sh
index 0e9545f2d..5b6037d61 100755
--- a/scripts/fetch-clang-format.sh
+++ b/scripts/fetch-clang-format.sh
@@ -8,9 +8,11 @@ URL="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/.c
 curl -s "${URL}" | sed -e "
 	s,^\( *\)#\([A-Z]\),\1\2,g;
 	s,ControlStatements,ControlStatementsExceptForEachMacros,g;
-	s,ColumnLimit: 80,ColumnLimit: 120,g;
+	s,ColumnLimit: 80,ColumnLimit: 0,g;
 	s,Intended for clang-format >= 4,Intended for clang-format >= 11,g;
+	s,ForEachMacros:,ForEachMacros:\n  - 'for_each_bit',g;
 	s,ForEachMacros:,ForEachMacros:\n  - 'for_each_pstree_item',g;
 	s,\(AlignTrailingComments:.*\)$,\1\nAlignConsecutiveMacros: true,g;
 	s,AlignTrailingComments: false,AlignTrailingComments: true,g;
+	s,\(IndentCaseLabels: false\),\1\nIndentGotoLabels: false,g;
 "  > .clang-format
diff --git a/scripts/github-indent-warnings.py b/scripts/github-indent-warnings.py
new file mode 100755
index 000000000..04f82d6c1
--- /dev/null
+++ b/scripts/github-indent-warnings.py
@@ -0,0 +1,33 @@
+#!/usr/bin/python3
+import sys
+import re
+
+re_file = r'^diff --git a/(\S\S*)\s.*$'
+re_line = r'^@@ -(\d\d*)\D.*@@.*$'
+
+if __name__ == '__main__':
+    if len(sys.argv) != 1 and len(sys.argv) != 2:
+        print(f'usage: {sys.argv[0]} <path/to/file>')
+        print(f'usage: <command> | {sys.argv[0]}')
+        exit(1)
+
+    input_file = sys.stdin.fileno()
+    if len(sys.argv) == 2:
+        input_file = sys.argv[1]
+
+    with open(input_file, 'r') as fi:
+        file_name = None
+        line_number = None
+        for line in fi:
+            file_matches = re.findall(re_file, line)
+            if len(file_matches) == 1:
+                file_name = file_matches[0]
+                continue
+
+            if file_name is None:
+                continue
+
+            line_matches = re.findall(re_line, line)
+            if len(line_matches) == 1:
+                line_number = int(line_matches[0]) + 3
+                print(f'::warning file={file_name},line={line_number}::clang-format: Possible coding style problem (https://github.com/checkpoint-restore/criu/blob/criu-dev/CONTRIBUTING.md#automatic-tools-to-fix-coding-style)')
diff --git a/scripts/install-debian-pkgs.sh b/scripts/install-debian-pkgs.sh
deleted file mode 100755
index 540c2c094..000000000
--- a/scripts/install-debian-pkgs.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-# Install required packages for development environment in Debian Distro
-
-REQ_PKGS=${REQ_PKGS:=contrib/debian/dev-packages.lst}
-
-help_msg="Install required packages for development environment in Debian Distro
-Usage:
-	scripts/install-debian-pkgs.sh"
-
-function print_help()
-{
-	exec echo -e "$help_msg"
-}
-
-function process()
-{
-	sudo apt-get update
-	sudo apt-get install -yq "$( sed 's/\#.*$//' ${REQ_PKGS} )"
-}
-
-if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
-	print_help
-else
-	process
-fi
diff --git a/scripts/magic-gen.py b/scripts/magic-gen.py
index 3b1f29fb5..38dff1424 100755
--- a/scripts/magic-gen.py
+++ b/scripts/magic-gen.py
@@ -1,4 +1,4 @@
-#!/bin/env python2
+#!/bin/env python3
 import sys
 
 
diff --git a/scripts/nmk/scripts/include.mk b/scripts/nmk/scripts/include.mk
index c1c1e94af..603c322cf 100644
--- a/scripts/nmk/scripts/include.mk
+++ b/scripts/nmk/scripts/include.mk
@@ -20,7 +20,9 @@ ARCH	?= $(shell echo $(SUBARCH) | sed	\
                 -e s/ppc64.*/ppc64/		\
                 -e s/mips.*/mips/		\
                 -e s/sh[234].*/sh/		\
-                -e s/aarch64.*/aarch64/)
+                -e s/aarch64.*/aarch64/		\
+                -e s/riscv64.*/riscv64/		\
+                -e s/loongarch64.*/loongarch64/)
 
 export SUBARCH ARCH
 
diff --git a/scripts/nmk/scripts/main.mk b/scripts/nmk/scripts/main.mk
index 493a164f8..7f11bda23 100644
--- a/scripts/nmk/scripts/main.mk
+++ b/scripts/nmk/scripts/main.mk
@@ -1,7 +1,7 @@
 ifndef ____nmk_defined__main
 
 #
-# Genaral inclusion statement
+# General inclusion statement
 
 ifndef ____nmk_defined__include
         include $(__nmk_dir)include.mk
diff --git a/scripts/nmk/scripts/tools.mk b/scripts/nmk/scripts/tools.mk
index 1681d4e90..de5782c13 100644
--- a/scripts/nmk/scripts/tools.mk
+++ b/scripts/nmk/scripts/tools.mk
@@ -23,7 +23,7 @@ MAKE		:= make
 MKDIR		:= mkdir -p
 AWK		:= awk
 PERL		:= perl
-FULL_PYTHON	:= $(shell which python3 2>/dev/null || which python2 2>/dev/null)
+FULL_PYTHON	:= $(shell command -v python3 2>/dev/null)
 PYTHON		?= $(shell basename $(FULL_PYTHON))
 FIND		:= find
 SH		:= $(shell if [ -x "$$BASH" ]; then echo $$BASH;        \
@@ -36,7 +36,7 @@ CTAGS		:= ctags
 export RM HOSTLD LD HOSTCC CC CPP AS AR STRIP OBJCOPY OBJDUMP
 export NM SH MAKE MKDIR AWK PERL PYTHON SH CSCOPE
 
-export USE_ASCIIDOCTOR ?= $(shell which asciidoctor 2>/dev/null)
+export USE_ASCIIDOCTOR ?= $(shell command -v asciidoctor 2>/dev/null)
 
 #
 # Footer.
diff --git a/scripts/protobuf-gen.sh b/scripts/protobuf-gen.sh
index 0c738f13a..25d2feaeb 100644
--- a/scripts/protobuf-gen.sh
+++ b/scripts/protobuf-gen.sh
@@ -1,15 +1,15 @@
 #!/bin/bash
 
-# shellcheck disable=SC2013,SC1004
-
 TR="y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/"
 
-for x in $(sed -n '/PB_AUTOGEN_START/,/PB_AUTOGEN_STOP/ {
+sed -n '/PB_AUTOGEN_START/,/PB_AUTOGEN_STOP/ {
 		/PB_AUTOGEN_ST/d;
+		/^[ \t]*$/d;
 		s/,.*$//;
 		s/\tPB_//;
 		p;
-	   }' criu/include/protobuf-desc.h); do
+	   }' criu/include/protobuf-desc.h | \
+while IFS= read -r x; do
 	x_la=$(echo "$x" | sed $TR)
 	x_uf=$(echo "$x" | sed -nr 's/^./&#\\\
 /;
diff --git a/scripts/ruff.toml b/scripts/ruff.toml
new file mode 100644
index 000000000..2b0385976
--- /dev/null
+++ b/scripts/ruff.toml
@@ -0,0 +1,4 @@
+# Ignore `E401` (import violations) in all `__init__.py` files
+[lint.per-file-ignores]
+"__init__.py" = ["F401"]
+
diff --git a/scripts/uninstall_module.py b/scripts/uninstall_module.py
new file mode 100755
index 000000000..2da63c800
--- /dev/null
+++ b/scripts/uninstall_module.py
@@ -0,0 +1,76 @@
+#!/usr/bin/python3
+"""
+`pip uninstall` doesn't support `--prefix`.
+https://github.com/pypa/pip/issues/11213
+"""
+import argparse
+import os
+import shutil
+import site
+import subprocess
+import sys
+
+# With Python 3.13 the subprocess module now uses the `posix_spawn()`
+# function which requires loading the `signal` module:
+#     https://docs.python.org/3/whatsnew/3.13.html#subprocess
+#
+# We need to load this module here, before PYTHONPATH and sys.path
+# have been modified to use the path specified with `--prefix`.
+#
+# flake8: noqa: F401
+import signal
+
+import importlib_metadata
+
+
+def add_site_dir(prefix: str):
+    """
+    Add site directory with prefix to sys.path and update PYTHONPATH.
+    """
+    # If prefix is used, we need to make sure that we
+    # do not uninstall other packages from the system paths.
+    sys.path = []
+    site.PREFIXES = [prefix]
+    pkgs = site.getsitepackages()
+    for path in pkgs:
+        site.addsitedir(path)
+        if 'dist-packages' in path:
+            # Ubuntu / Debian might use both dist- and site- packages.
+            site.addsitedir(path.replace('dist-packages', 'site-packages'))
+    os.environ['PYTHONPATH'] = os.pathsep.join(sys.path)
+
+
+def uninstall_module(package_name: str, prefix=None):
+    """
+    Enable support for '--prefix' with 'pip uninstall'.
+    """
+    dist_info_path = None
+    if prefix:
+        add_site_dir(prefix)
+        try:
+            distribution = next(importlib_metadata.Distribution.discover(name=package_name))
+            dist_info_path = str(distribution._path)
+        except StopIteration:
+            print(f"Skipping {package_name} as it is not installed.")
+            sys.exit(0)
+
+    command = [sys.executable, '-m', 'pip', 'uninstall', '-y', package_name]
+    try:
+        subprocess.check_call(command, env=os.environ)
+        if dist_info_path and os.path.isdir(dist_info_path):
+            # .dist-info files are not cleaned up when the package
+            # has been installed with --prefix.
+            # https://github.com/pypa/pip/issues/5573
+            shutil.rmtree(dist_info_path)
+            if 'dist-packages' in dist_info_path:
+                shutil.rmtree(dist_info_path.replace('dist-packages', 'site-packages'))
+    except subprocess.CalledProcessError as err:
+        print(f'Error uninstalling package {package_name}: {err}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('module_name', help='The name of the module to uninstall')
+    parser.add_argument('--prefix', help='The prefix where the module was installed')
+    args = parser.parse_args()
+    uninstall_module(args.module_name, args.prefix)
diff --git a/soccr/soccr.c b/soccr/soccr.c
index abea93703..8e1ce1c63 100644
--- a/soccr/soccr.c
+++ b/soccr/soccr.c
@@ -781,7 +781,7 @@ int libsoccr_restore(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsi
 	return 0;
 }
 
-static int __send_queue(struct libsoccr_sk *sk, int queue, char *buf, __u32 len)
+static int __send_queue(struct libsoccr_sk *sk, const char *queue, char *buf, __u32 len)
 {
 	int ret, err = -1, max_chunk;
 	int off;
@@ -816,7 +816,7 @@ static int __send_queue(struct libsoccr_sk *sk, int queue, char *buf, __u32 len)
 				continue;
 			}
 
-			logerr("Can't restore %d queue data (%d), want (%d:%d:%d)", queue, ret, chunk, len, max_chunk);
+			logerr("Can't restore %s queue data (%d), want (%d-%d:%d:%d)", queue, ret, off, chunk, len, max_chunk);
 			goto err;
 		}
 		off += ret;
@@ -837,7 +837,7 @@ static int send_queue(struct libsoccr_sk *sk, int queue, char *buf, __u32 len)
 		return -1;
 	}
 
-	return __send_queue(sk, queue, buf, len);
+	return __send_queue(sk, queue == TCP_RECV_QUEUE ? "recv" : "send", buf, len);
 }
 
 static int libsoccr_restore_queue(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsigned data_size, int queue,
@@ -876,7 +876,7 @@ static int libsoccr_restore_queue(struct libsoccr_sk *sk, struct libsoccr_sk_dat
 			 * they can be restored without any tricks.
 			 */
 			tcp_repair_off(sk->fd);
-			if (__send_queue(sk, TCP_SEND_QUEUE, buf + len, ulen))
+			if (__send_queue(sk, "not-sent send", buf + len, ulen))
 				return -3;
 			if (tcp_repair_on(sk->fd))
 				return -4;
diff --git a/soccr/test/Makefile b/soccr/test/Makefile
index 458540045..499901b0c 100644
--- a/soccr/test/Makefile
+++ b/soccr/test/Makefile
@@ -21,7 +21,6 @@ tcp-conn-v6: tcp-conn-v6.c
 test: tcp-constructor tcp-conn tcp-conn-v6
 	unshare -n sh -c "ip link set up dev lo; ./tcp-conn"
 	unshare -n sh -c "ip link set up dev lo; ./tcp-conn-v6"
-	python run.py ./$(RUN)
+	python3 run.py ./$(RUN)
 
 .PHONY: test
-
diff --git a/soccr/test/run.py b/soccr/test/run.py
index 1ffe58a58..57c556e36 100644
--- a/soccr/test/run.py
+++ b/soccr/test/run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import sys, os
 import hashlib
diff --git a/soccr/test/tcp-test.py b/soccr/test/tcp-test.py
index ff3fe29dc..b48f532eb 100755
--- a/soccr/test/tcp-test.py
+++ b/soccr/test/tcp-test.py
@@ -1,6 +1,5 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 
-from __future__ import print_function
 import sys, socket
 import hashlib
 
diff --git a/test/Makefile b/test/Makefile
index 8416b1961..0bfdab680 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -12,7 +12,7 @@ all:
 	$(MAKE) zdtm-freezer
 .PHONY: all
 
-TESTS = unix-callback mem-snap rpc libcriu mounts/ext security pipes crit socketpairs overlayfs mnt-ext-dev shell-job
+TESTS = unix-callback mem-snap rpc libcriu mounts/ext security pipes crit socketpairs overlayfs mnt-ext-dev shell-job criu-ns skip-file-rwx-check
 
 other:
 	for t in $(TESTS); do				\
@@ -45,10 +45,6 @@ zdtm-freezer:
 	./zdtm.py run --test zdtm/transition/thread-bomb --pre 3 --freezecg zdtm:f
 .PHONY: zdtm-freezer
 
-fault-injection:
-	$(MAKE) -C fault-injection
-.PHONY: fault-injection
-
 override CFLAGS += -D_GNU_SOURCE
 
 clean_root:
diff --git a/test/check_actions.py b/test/check_actions.py
deleted file mode 100755
index 4973e3938..000000000
--- a/test/check_actions.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import os
-
-actions = set(['pre-dump', 'pre-restore', 'post-dump', 'setup-namespaces', \
-  'post-setup-namespaces', 'post-restore', 'post-resume', \
-  'network-lock', 'network-unlock' ])
-errors = []
-af = os.path.dirname(os.path.abspath(__file__)) + '/actions_called.txt'
-
-for act in open(af):
-    act = act.strip().split()
-    act.append('EMPTY')
-    act.append('EMPTY')
-
-    if act[0] == 'EMPTY':
-        raise Exception("Error in test, bogus actions line")
-
-    if act[1] == 'EMPTY':
-        errors.append('Action %s misses CRTOOLS_IMAGE_DIR' % act[0])
-
-    if act[0] in ('post-dump', 'setup-namespaces', 'post-setup-namespaces', \
-      'post-restore', 'post-resume', 'network-lock', 'network-unlock'):
-        if act[2] == 'EMPTY':
-            errors.append('Action %s misses CRTOOLS_INIT_PID' % act[0])
-        elif not act[2].isdigit() or int(act[2]) == 0:
-            errors.append('Action %s PID is not number (%s)' %
-                          (act[0], act[2]))
-
-    actions -= set([act[0]])
-
-if actions:
-    errors.append('Not all actions called: %r' % actions)
-
-if errors:
-    for x in errors:
-        print(x)
-    sys.exit(1)
-
-print('PASS')
diff --git a/test/crit-recode.py b/test/crit-recode.py
index 4135681e1..f119271d8 100755
--- a/test/crit-recode.py
+++ b/test/crit-recode.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 import pycriu
 import sys
 import os
diff --git a/test/cuda-checkpoint/.gitignore b/test/cuda-checkpoint/.gitignore
new file mode 100644
index 000000000..717fb7028
--- /dev/null
+++ b/test/cuda-checkpoint/.gitignore
@@ -0,0 +1 @@
+cuda-checkpoint
diff --git a/test/cuda-checkpoint/Makefile b/test/cuda-checkpoint/Makefile
new file mode 100644
index 000000000..c59dadddc
--- /dev/null
+++ b/test/cuda-checkpoint/Makefile
@@ -0,0 +1,17 @@
+CFLAGS	+= $(USERCFLAGS) $(ARCHCFLAGS)
+
+BIN := cuda-checkpoint
+SRC := cuda-checkpoint.c
+DEP	:= $(SRC:%.c=%.d)
+OBJ	:= $(SRC:%.c=%.o)
+TARGETS	:= $(BIN)
+
+include ../zdtm/Makefile.inc
+
+all:	$(TARGETS)
+.PHONY: all
+
+clean-more:
+	$(RM) $(TARGETS)
+.PHONY: clean-more
+clean: clean-more
diff --git a/test/cuda-checkpoint/cuda-checkpoint.c b/test/cuda-checkpoint/cuda-checkpoint.c
new file mode 100644
index 000000000..3b7ce8b9f
--- /dev/null
+++ b/test/cuda-checkpoint/cuda-checkpoint.c
@@ -0,0 +1,57 @@
+/* The mocked version of cuda-checkpoint. */
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[])
+{
+	int c;
+
+	while (1) {
+		int option_index = 0;
+		static struct option long_options[] = {
+			{ "pid", required_argument, 0, 'p' },
+			{ "get-state", no_argument, 0, 's' },
+			{ "get-restore-tid", no_argument, 0, 'g' },
+			{ "action", required_argument, 0, 'a' },
+			{ "timeout", required_argument, 0, 't' },
+			{ "help", no_argument, 0, 'h' },
+			{ 0, 0, 0, 0 }
+		};
+
+		c = getopt_long(argc, argv, "p:ga:ht:",
+				long_options, &option_index);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'p':
+			printf("%s\n", optarg);
+			break;
+		case 'g':
+		case 'a':
+		case 't':
+			break;
+		case 's':
+			printf("running\n");
+			break;
+		case 'h':
+			printf("--action - execute an action");
+			break;
+
+		default:
+			fprintf(stderr, "getopt returned character code 0%o ??\n", c);
+			return 1;
+		}
+	}
+
+	if (optind < argc) {
+		fprintf(stderr, "non-option ARGV-elements: ");
+		while (optind < argc)
+			fprintf(stderr, "%s ", argv[optind++]);
+		fprintf(stderr, "\n");
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/test/exhaustive/pipe.py b/test/exhaustive/pipe.py
index 7f1c53d34..afe20846a 100755
--- a/test/exhaustive/pipe.py
+++ b/test/exhaustive/pipe.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import argparse
 import os
diff --git a/test/exhaustive/unix.py b/test/exhaustive/unix.py
index 5b4c972cb..689b1fb3a 100755
--- a/test/exhaustive/unix.py
+++ b/test/exhaustive/unix.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import sys
 import os
@@ -462,7 +462,7 @@ fail_desc = {
 
 
 def chk_real_state(st):
-    # Before enything else -- check that we still have
+    # Before anything else -- check that we still have
     # all the sockets at hands
     for sk in st.sockets:
         if not sk.visible:
diff --git a/test/inhfd/memfd.py.checkskip b/test/inhfd/memfd.py.checkskip
index 252778969..32c57d929 100755
--- a/test/inhfd/memfd.py.checkskip
+++ b/test/inhfd/memfd.py.checkskip
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import ctypes
 libc = ctypes.CDLL(None)
 
-# libc may not have memfd_create (e.g., centos on travis)
+# libc may not have memfd_create (e.g., centos)
 libc.memfd_create("test".encode('utf8'), 0)
diff --git a/test/javaTests/pom.xml b/test/javaTests/pom.xml
index faae44d1b..ddb6c89cf 100644
--- a/test/javaTests/pom.xml
+++ b/test/javaTests/pom.xml
@@ -38,7 +38,7 @@
 		<dependency>
 			<groupId>org.testng</groupId>
 			<artifactId>testng</artifactId>
-			<version>6.3.1</version>
+			<version>7.7.0</version>
 		</dependency>
 	</dependencies>
 	<properties>
diff --git a/test/jenkins/actions.sh b/test/jenkins/actions.sh
deleted file mode 100755
index 801904500..000000000
--- a/test/jenkins/actions.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-# Check how crit de/encodes images
-set -e
-source `dirname $0`/criu-lib.sh
-# prep
-rm -f actions_called.txt
-./test/zdtm.py run -t zdtm/static/env00 --script "$(pwd)/test/show_action.sh" || fail
-./test/check_actions.py || fail
-exit 0
diff --git a/test/jenkins/criu-dedup.sh b/test/jenkins/criu-dedup.sh
index 842d218bd..edb1b653d 100755
--- a/test/jenkins/criu-dedup.sh
+++ b/test/jenkins/criu-dedup.sh
@@ -4,7 +4,7 @@
 set -e
 source `dirname $0`/criu-lib.sh
 prep
-./test/zdtm.py run --all --keep-going --report report --parallel 4 -f h --pre 2 --dedup -x maps04 -x maps007 -x maps09 -x maps10 || fail
+./test/zdtm.py run --all --keep-going --report report --parallel 4 -f h --pre 2 --dedup -x maps04 -x maps007 || fail
 
 # Additionally run these tests as they touch a lot of
 # memory and it makes sense to additionally check it
diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh
index f41073230..6ee7ce33a 100755
--- a/test/jenkins/criu-fault.sh
+++ b/test/jenkins/criu-fault.sh
@@ -9,7 +9,7 @@ prep
 ./test/zdtm.py run -t zdtm/static/maps00 --fault 3 --report report -f h || fail
 
 # FIXME: fhandles looks broken on btrfs
-cat /proc/self/mountinfo | grep -P "/.* / " | grep -q btrfs || NOBTRFS=$?
+findmnt --noheadings --target . | grep -q btrfs || NOBTRFS=$?
 if [ $NOBTRFS -eq 1 ] ; then
 	./test/zdtm.py run -t zdtm/static/inotify_irmap --fault 128 --pre 2 -f uns || fail
 fi
@@ -39,3 +39,13 @@ fi
 ./test/zdtm.py run -t zdtm/static/fpu03 --fault 134 -f h --norst || fail
 # also check for the main thread corruption
 ./test/zdtm.py run -t zdtm/static/fpu00 --fault 134 -f h --norst || fail
+
+# check set_compel_interrupt_only_mode
+./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:t --fault 137
+./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:t --fault 137 --norst
+# check set_compel_interrupt_only_mode when test cgroup is frozen
+./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:f --fault 137
+
+if ./test/zdtm.py run -t zdtm/static/vfork00 --fault 136 --report report -f h ; then
+	fail
+fi
diff --git a/test/jenkins/criu-lazy-migration.pipeline b/test/jenkins/criu-lazy-migration.pipeline
index 2c863f170..45dc2c776 100644
--- a/test/jenkins/criu-lazy-migration.pipeline
+++ b/test/jenkins/criu-lazy-migration.pipeline
@@ -21,7 +21,6 @@ pipeline {
 		stage('Test'){
 			steps {
 				sh './test/jenkins/run_ct sh -c "mount --make-rprivate / && mount --rbind . /mnt && cd /mnt && ./test/jenkins/criu-lazy-migration.sh"'
-				junit 'test/report/criu-testreport*.xml'
 			}
 		}
 	}
diff --git a/test/jenkins/criu-lazy-migration.sh b/test/jenkins/criu-lazy-migration.sh
index b23f31c79..02a212e0d 100755
--- a/test/jenkins/criu-lazy-migration.sh
+++ b/test/jenkins/criu-lazy-migration.sh
@@ -15,7 +15,7 @@ LAZY_MIGRATE_EXCLUDE="-x fifo_loop -x file_locks -x ptrace_sig -x overmount_file
 	       --lazy-migrate $LAZY_EXCLUDE $LAZY_MIGRATE_EXCLUDE || fail
 
 # During pre-dump + lazy-pages we leave VM_NOHUGEPAGE set
-LAZY_EXCLUDE="$LAZY_EXCLUDE -x maps02 -x maps09 -x maps10"
+LAZY_EXCLUDE="$LAZY_EXCLUDE -x maps02"
 
 # lazy restore from images with pre-dumps
 ./test/zdtm.py run --all --keep-going --report report --parallel 4 -f uns \
diff --git a/test/jenkins/criu-lazy-pages.sh b/test/jenkins/criu-lazy-pages.sh
index f62912090..9ef721739 100755
--- a/test/jenkins/criu-lazy-pages.sh
+++ b/test/jenkins/criu-lazy-pages.sh
@@ -12,7 +12,7 @@ source `dirname $0`/criu-lazy-common.sh
 	       --lazy-pages $LAZY_EXCLUDE || fail
 
 # During pre-dump + lazy-pages we leave VM_NOHUGEPAGE set
-LAZY_EXCLUDE="$LAZY_EXCLUDE -x maps02 -x maps09 -x maps10"
+LAZY_EXCLUDE="$LAZY_EXCLUDE -x maps02"
 
 # lazy restore from images with pre-dumps
 ./test/zdtm.py run --all --keep-going --report report --parallel 4 \
diff --git a/test/jenkins/criu-pre-dump.sh b/test/jenkins/criu-pre-dump.sh
index b2972d941..137f7c23f 100755
--- a/test/jenkins/criu-pre-dump.sh
+++ b/test/jenkins/criu-pre-dump.sh
@@ -5,6 +5,5 @@ set -e
 source `dirname $0`/criu-lib.sh
 prep
 mount_tmpfs_to_dump
-# FIXME: https://github.com/checkpoint-restore/criu/issues/1868
-./test/zdtm.py run --all --keep-going --report report --parallel 4 --pre 3 -x 'maps04' -x 'maps09' -x 'maps10' || fail
-./test/zdtm.py run --all --keep-going --report report --parallel 4 --pre 3 --page-server -x 'maps04' -x 'maps09' -x 'maps10' || fail
+./test/zdtm.py run --all --keep-going --report report --parallel 4 --pre 3 -x 'maps04' || fail
+./test/zdtm.py run --all --keep-going --report report --parallel 4 --pre 3 --page-server -x 'maps04' || fail
diff --git a/test/jenkins/criu-remote-lazy-pages.sh b/test/jenkins/criu-remote-lazy-pages.sh
index 48787f3f6..1c677e333 100755
--- a/test/jenkins/criu-remote-lazy-pages.sh
+++ b/test/jenkins/criu-remote-lazy-pages.sh
@@ -12,7 +12,7 @@ source `dirname $0`/criu-lazy-common.sh
 	       --remote-lazy-pages $LAZY_EXCLUDE -x maps04 || fail
 
 # During pre-dump + lazy-pages we leave VM_NOHUGEPAGE set
-LAZY_EXCLUDE="$LAZY_EXCLUDE -x maps02  -x maps09 -x maps10"
+LAZY_EXCLUDE="$LAZY_EXCLUDE -x maps02"
 
 # lazy restore from "remote" dump with pre-dumps
 ./test/zdtm.py run --all --keep-going --report report --parallel 4 \
diff --git a/test/jenkins/criu-snap.sh b/test/jenkins/criu-snap.sh
index d8fdf02b3..b08c57f52 100755
--- a/test/jenkins/criu-snap.sh
+++ b/test/jenkins/criu-snap.sh
@@ -5,5 +5,5 @@ set -e
 source `dirname $0`/criu-lib.sh
 prep
 mount_tmpfs_to_dump
-./test/zdtm.py run --all --keep-going --report report --parallel 4 --pre 3 --snaps -x 'maps04' -x 'maps09' -x 'maps10' || fail
-./test/zdtm.py run --all --keep-going --report report --parallel 4 --pre 3 --snaps --page-server -x 'maps04' -x 'maps09' -x 'maps10' || fail
+./test/zdtm.py run --all --keep-going --report report --parallel 4 --pre 3 --snaps -x 'maps04' || fail
+./test/zdtm.py run --all --keep-going --report report --parallel 4 --pre 3 --snaps --page-server -x 'maps04' || fail
diff --git a/test/libfault/Makefile b/test/libfault/Makefile
new file mode 100644
index 000000000..cbe47fdf2
--- /dev/null
+++ b/test/libfault/Makefile
@@ -0,0 +1,21 @@
+CC = gcc
+CFLAGS = -c -fPIC -ldl
+
+SRC = libfault.c
+OBJ = $(SRC:.c=.o)
+
+LIB = libfault.so
+
+.PHONY: all clean run
+
+all: $(LIB)
+
+$(LIB): $(OBJ)
+	$(CC) -shared -o $(LIB) $(OBJ)
+
+$(OBJ): $(SRC)
+	$(CC) $(CFLAGS) $<
+
+clean:
+	rm -f $(OBJ) $(LIB)
+
diff --git a/test/libfault/libfault.c b/test/libfault/libfault.c
new file mode 100644
index 000000000..650bf08ca
--- /dev/null
+++ b/test/libfault/libfault.c
@@ -0,0 +1,31 @@
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <dlfcn.h>
+#include <errno.h>
+
+ssize_t (*original_pread)(int fd, void *buf, size_t count, off_t offset) = NULL;
+
+/**
+ * This function is a wrapper around pread() that is used for testing CRIU's
+ * handling of cases where pread() returns less data than requested.
+ *
+ * pmc_fill() in criu/pagemap.c is a good example of where this can happen.
+ */
+ssize_t pread64(int fd, void *buf, size_t count, off_t offset)
+{
+	if (!original_pread) {
+		original_pread = dlsym(RTLD_NEXT, "pread");
+		if (!original_pread) {
+			errno = EIO;
+			return -1;
+		}
+	}
+
+	/* The following aims to simulate the case when pread() returns less
+	 * data than requested. We need to ensure that CRIU handles such cases. */
+	if (count > 2048) {
+		count -= 1024;
+	}
+
+	return original_pread(fd, buf, count, offset);
+}
diff --git a/test/others/action-script/.gitignore b/test/others/action-script/.gitignore
new file mode 100644
index 000000000..ca9a0b541
--- /dev/null
+++ b/test/others/action-script/.gitignore
@@ -0,0 +1 @@
+actions_called.txt
diff --git a/test/others/action-script/Makefile b/test/others/action-script/Makefile
new file mode 100644
index 000000000..594edc070
--- /dev/null
+++ b/test/others/action-script/Makefile
@@ -0,0 +1,3 @@
+run:
+	./run.sh
+.PHONY: run
diff --git a/test/others/action-script/check_actions.py b/test/others/action-script/check_actions.py
new file mode 100755
index 000000000..0140d8762
--- /dev/null
+++ b/test/others/action-script/check_actions.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+
+EXPECTED_ACTIONS = [
+    'pre-dump',
+    'network-lock',
+    'post-dump',
+    'pre-restore',
+    'setup-namespaces',
+    'post-setup-namespaces',
+    'post-restore',
+    'network-unlock',
+    'pre-resume',
+    'post-resume',
+]
+
+errors = []
+actions_called = []
+actions_called_file = os.path.join(os.path.dirname(__file__), 'actions_called.txt')
+
+with open(actions_called_file) as f:
+    for index, line in enumerate(f):
+        parts = line.strip().split()
+        parts += ['EMPTY'] * (3 - len(parts))
+        action_hook, image_dir, pid = parts
+
+        if action_hook == 'EMPTY':
+            raise ValueError("Error in test: bogus actions line")
+
+        expected_action = EXPECTED_ACTIONS[index] if index < len(EXPECTED_ACTIONS) else None
+        if action_hook != expected_action:
+            raise ValueError(f"Invalid action: {action_hook} != {expected_action}")
+
+        if image_dir == 'EMPTY':
+            errors.append(f'Action {action_hook} misses CRTOOLS_IMAGE_DIR')
+
+        if action_hook != 'pre-restore':
+            if pid == 'EMPTY':
+                errors.append(f'Action {action_hook} misses CRTOOLS_INIT_PID')
+            elif not pid.isdigit() or int(pid) == 0:
+                errors.append(f'Action {action_hook} PID is not a valid number ({pid})')
+
+        actions_called.append(action_hook)
+
+if actions_called != EXPECTED_ACTIONS:
+    errors.append(f'Not all actions called: {actions_called!r}')
+
+if errors:
+    print('\n'.join(errors))
+    sys.exit(1)
+
+print('Check Actions PASS')
diff --git a/test/others/action-script/run.sh b/test/others/action-script/run.sh
new file mode 100755
index 000000000..574f6fc86
--- /dev/null
+++ b/test/others/action-script/run.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+rm -f "${SCRIPT_DIR}"/actions_called.txt
+"${SCRIPT_DIR}"/../../zdtm.py run -t zdtm/static/env00 -f ns --script "$SCRIPT_DIR/show_action.sh" || exit 1
+"${SCRIPT_DIR}"/check_actions.py || exit 1
+
+exit 0
diff --git a/test/show_action.sh b/test/others/action-script/show_action.sh
similarity index 66%
rename from test/show_action.sh
rename to test/others/action-script/show_action.sh
index 86468b67a..afbfc3f27 100755
--- a/test/show_action.sh
+++ b/test/others/action-script/show_action.sh
@@ -1,3 +1,4 @@
 #!/bin/bash
+
 echo "${CRTOOLS_SCRIPT_ACTION} ${CRTOOLS_IMAGE_DIR} ${CRTOOLS_INIT_PID}" \
-	     >> "$(dirname $0)/actions_called.txt"
+	>> "$(dirname "$0")/actions_called.txt"
diff --git a/test/others/app-emu/java/HelloWorld/run.sh b/test/others/app-emu/java/HelloWorld/run.sh
index 0ed6afd14..e6dcbd9fc 100644
--- a/test/others/app-emu/java/HelloWorld/run.sh
+++ b/test/others/app-emu/java/HelloWorld/run.sh
@@ -18,7 +18,7 @@ setsid java HelloWorld &
 
 pid=${!}
 
-echo Lanuched java application with pid $pid in background
+echo Launched java application with pid $pid in background
 
 ${criu} dump -D dump -o dump.log -v4 --shell-job -t ${pid} || {
 	echo "Dump failed"
diff --git a/test/others/app-emu/make/run.sh b/test/others/app-emu/make/run.sh
index 7cb44c770..d871b7d9c 100644
--- a/test/others/app-emu/make/run.sh
+++ b/test/others/app-emu/make/run.sh
@@ -28,7 +28,7 @@ setsid make -j4 &
 
 pid=${!}
 
-echo Lanuched make in $pid background
+echo Launched make in $pid background
 sleep 2
 
 ${criu} dump --shell-job -D dump -o dump.log -v4  -t ${pid} || {
diff --git a/test/others/bers/bers.c b/test/others/bers/bers.c
index 37cf84dd3..b291e3bcb 100644
--- a/test/others/bers/bers.c
+++ b/test/others/bers/bers.c
@@ -391,7 +391,7 @@ usage:
 	pr_msg("    -f|--files <num>         create <num> files for each task\n");
 	pr_msg("    -m|--memory <num>        allocate <num> megabytes for each task\n");
 	pr_msg("    --memory-chunks <num>    split memory to <num> equal parts\n");
-	pr_msg("    --mem-fill <mode>        fill memory with data dependin on <mode>:\n");
+	pr_msg("    --mem-fill <mode>        fill memory with data depending on <mode>:\n");
 	pr_msg("                all          fill every byte of memory\n");
 	pr_msg("                light        fill first bytes of every page\n");
 	pr_msg("                dirtify      fill every page\n");
diff --git a/test/others/config-file/run.sh b/test/others/config-file/run.sh
index 92195883e..26b835b45 100755
--- a/test/others/config-file/run.sh
+++ b/test/others/config-file/run.sh
@@ -11,7 +11,7 @@
 
 set -xbm
 
-#shellcheck disable=SC1091
+# shellcheck source=test/others/env.sh
 source ../env.sh
 
 if [ ! -d /etc/criu ]; then
diff --git a/test/others/crit/test.sh b/test/others/crit/test.sh
index 0d38043d7..2698bbd3c 100755
--- a/test/others/crit/test.sh
+++ b/test/others/crit/test.sh
@@ -1,11 +1,12 @@
 #!/bin/bash
-# shellcheck disable=SC1091,SC2002
+# shellcheck disable=SC2002
 
 set -x
 
+# shellcheck source=test/others/env.sh
 source ../env.sh
 
-images_list=""
+images_list=()
 
 function gen_imgs {
 	PID=$(../loop)
@@ -16,15 +17,15 @@ function gen_imgs {
 		exit 1
 	fi
 
-	images_list=$(ls -1 ./*.img)
-	if [ -z "$images_list" ]; then
+	images_list=(./*.img)
+	if [ "${#images_list[@]}" -eq 0 ]; then
 		echo "Failed to generate images"
 		exit 1
 	fi
 }
 
 function run_test1 {
-	for x in $images_list
+	for x in "${images_list[@]}"
 	do
 		echo "=== $x"
 		if [[ $x == *pages* ]]; then
@@ -45,15 +46,16 @@ function run_test1 {
 
 
 function run_test2 {
-	mapfile -t array <<< "$images_list"
-
-	PROTO_IN=${array[0]}
+	PROTO_IN="${images_list[0]}"
 	JSON_IN=$(mktemp -p ./ tmp.XXXXXXXXXX.json)
 	OUT=$(mktemp -p ./ tmp.XXXXXXXXXX.log)
 
 	# prepare
 	${CRIT} decode -i "${PROTO_IN}" -o "${JSON_IN}"
 
+	# show info about image
+	${CRIT} info "${PROTO_IN}"
+
 	# proto in - json out decode
 	cat "${PROTO_IN}" | ${CRIT} decode || exit 1
 	cat "${PROTO_IN}" | ${CRIT} decode -o "${OUT}" || exit 1
@@ -99,6 +101,8 @@ function run_test2 {
 	${CRIT} x ./ rss || exit 1
 }
 
+${CRIT} --version
+
 gen_imgs
 run_test1
 run_test2
diff --git a/test/others/criu-coredump/test.sh b/test/others/criu-coredump/test.sh
index dd774e298..2be82e64c 100755
--- a/test/others/criu-coredump/test.sh
+++ b/test/others/criu-coredump/test.sh
@@ -1,11 +1,11 @@
 #!/bin/bash
 
 set -x
-# shellcheck disable=SC1091
+# shellcheck source=test/others/env.sh
 source ../env.sh || exit 1
 
 function gen_imgs {
-	PID=$(../loop)
+	PID=$(../loop with a very very very very very very very very very very very very long cmdline)
 	if ! $CRIU dump -v4 -o dump.log -D ./ -t "$PID"; then
 		echo "Failed to checkpoint process $PID"
 		cat dump.log
@@ -43,5 +43,12 @@ function run_test {
 	echo "= done"
 }
 
+UNAME_M=$(uname -m)
+
+if [[ "$UNAME_M" != "aarch64" && "$UNAME_M" != "armv7l" &&"$UNAME_M" != "x86_64" ]]; then
+	echo "criu-coredump only supports aarch64 armv7l, and x86_64. skipping."
+	exit 0
+fi
+
 gen_imgs
 run_test
diff --git a/test/others/criu-ns/Makefile b/test/others/criu-ns/Makefile
new file mode 100644
index 000000000..4d901a111
--- /dev/null
+++ b/test/others/criu-ns/Makefile
@@ -0,0 +1,3 @@
+run:
+	@make -C ../.. zdtm_ct
+	../../zdtm_ct run.py
diff --git a/test/others/criu-ns/run.py b/test/others/criu-ns/run.py
new file mode 100755
index 000000000..0a36438e8
--- /dev/null
+++ b/test/others/criu-ns/run.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python3
+
+import fcntl
+import os
+import pathlib
+import pty
+import shutil
+import subprocess
+import sys
+import termios
+import time
+
+
+CRIU_BIN = "../../../criu/criu"
+CRIU_NS = "../../../scripts/criu-ns"
+IMG_DIR = "dumpdir"
+DUMP_LOG = "dump.log"
+RESTORE_LOG = "restore.log"
+PIDFILE = "pidfile"
+
+
+def check_dumpdir(path=IMG_DIR):
+    if os.path.isdir(path):
+        shutil.rmtree(path)
+    os.mkdir(path, 0o755)
+
+
+def run_task_with_own_pty(task):
+    fd_m, fd_s = pty.openpty()
+
+    pid = os.fork()
+    if pid == 0:
+        os.close(fd_m)
+        os.setsid()
+        os.dup2(fd_s, 0)
+        os.dup2(fd_s, 1)
+        os.dup2(fd_s, 2)
+        fcntl.ioctl(fd_s, termios.TIOCSCTTY, 1)
+        os.close(fd_s)
+        task()
+        exit(0)
+
+    os.close(fd_s)
+    fd_m = os.fdopen(fd_m, "rb")
+    os.set_blocking(fd_m.fileno(), False)
+
+    while True:
+        try:
+            data = fd_m.read()
+        except IOError:
+            break
+        if data is not None:
+            print(data.decode("utf-8"))
+
+    _, status = os.waitpid(pid, 0)
+
+    try:
+        data = fd_m.read()
+    except IOError as err:
+        print(err)
+
+    if data is not None:
+        print(data.decode("utf-8"))
+    fd_m.close()
+
+    if status != 0:
+        print("task %s exited badly: %d" % (task.__name__, status))
+        exit(1)
+
+    return 0
+
+
+def create_pty():
+    fd_m, fd_s = pty.openpty()
+    return (os.fdopen(fd_m, "wb"), os.fdopen(fd_s, "wb"))
+
+
+def create_isolated_dumpee():
+    pathlib.Path("running").touch()
+    fd_m, fd_s = create_pty()
+    pid = os.fork()
+    if pid == 0:
+        os.setsid()
+        os.dup2(fd_s.fileno(), 0)
+        os.dup2(fd_s.fileno(), 1)
+        os.dup2(fd_s.fileno(), 2)
+        fcntl.ioctl(fd_s.fileno(), termios.TIOCSCTTY, 1)
+        while True:
+            if not os.access("running", os.F_OK):
+                sys.exit(0)
+            time.sleep(1)
+    fd_m.close()
+    fd_s.close()
+    return pid
+
+
+def criu_ns_dump(pid, shell_job=False):
+    cmd = [CRIU_NS, "dump", "-D", IMG_DIR, "-v4", "-t", str(pid),
+           "--log-file", DUMP_LOG, "--criu-binary", CRIU_BIN]
+    if shell_job:
+        cmd.append("--shell-job")
+    ret = subprocess.Popen(cmd).wait()
+    return ret
+
+
+def criu_ns_restore(shell_job=False, restore_detached=False):
+    cmd = [CRIU_NS, "restore", "-D", IMG_DIR, "-v4", "--log-file",
+           RESTORE_LOG, "--criu-binary", CRIU_BIN]
+    if shell_job:
+        cmd.append("--shell-job")
+    if restore_detached:
+        cmd += ["--restore-detached", "--pidfile", PIDFILE]
+    ret = subprocess.Popen(cmd).wait()
+    return ret
+
+
+def read_log_file(filename):
+    logfile_path = os.path.join(IMG_DIR, filename)
+    with open(logfile_path) as logfile:
+        print(logfile.read())
+
+
+def test_dump_and_restore_with_shell_job():
+    print("Test criu-ns dump and restore with --shell-job option")
+    check_dumpdir()
+    pathlib.Path("running").touch()
+    pid = os.fork()
+    if pid == 0:
+        while True:
+            if not os.access("running", os.F_OK):
+                sys.exit(0)
+            time.sleep(1)
+
+    ret = criu_ns_dump(pid, shell_job=True)
+    if ret != 0:
+        read_log_file(DUMP_LOG)
+        sys.exit(ret)
+
+    os.unlink("running")
+    fd_m, fd_s = create_pty()
+    pid = os.fork()
+    if pid == 0:
+        os.setsid()
+        fd_m.close()
+        # since criu-ns takes control of the tty stdin
+        os.dup2(fd_s.fileno(), 0)
+        ret = criu_ns_restore(shell_job=True)
+        if ret != 0:
+            read_log_file(RESTORE_LOG)
+            sys.exit(ret)
+        os._exit(0)
+
+    fd_s.close()
+    os.waitpid(pid, 0)
+
+
+def test_dump_and_restore_without_shell_job(restore_detached=False):
+    print("Test criu-ns dump and restore with an isolated process"
+          "(%d)" % restore_detached)
+    check_dumpdir()
+    pid = create_isolated_dumpee()
+    ret = criu_ns_dump(pid)
+    if ret != 0:
+        read_log_file(DUMP_LOG)
+        sys.exit(ret)
+
+    if not restore_detached:
+        os.unlink("running")
+
+    pid = os.fork()
+    if pid == 0:
+        os.setsid()
+        ret = criu_ns_restore(restore_detached=restore_detached)
+        if ret != 0:
+            read_log_file(RESTORE_LOG)
+            sys.exit(ret)
+        os._exit(0)
+
+    os.waitpid(pid, 0)
+
+
+def test_dump_and_restore_in_pidns():
+    if os.system("grep NSpid /proc/self/status"):
+        return
+
+    print("Test criu-ns dump and restore in namespaces")
+
+    def _dump():
+        pid = create_isolated_dumpee()
+        ret = criu_ns_dump(pid)
+        if ret != 0:
+            read_log_file(DUMP_LOG)
+            sys.exit(ret)
+
+    def _restore():
+        ret = criu_ns_restore(restore_detached=True)
+        if ret != 0:
+            read_log_file(RESTORE_LOG)
+            sys.exit(ret)
+
+    def _get_restored_pid():
+        restored_pid = 0
+        pidfile_path = os.path.join(IMG_DIR, PIDFILE)
+        if not os.path.exists(pidfile_path):
+            raise FileNotFoundError("pidfile not found")
+        with open(pidfile_path, "r") as pidfile:
+            restored_pid = pidfile.read().strip()
+        return int(restored_pid)
+
+    def _redump():
+        global IMG_DIR
+        try:
+            restored_pid = _get_restored_pid()
+        except FileNotFoundError:
+            sys.exit(1)
+        IMG_DIR = "dumpdir2"
+        check_dumpdir(IMG_DIR)
+        ret = criu_ns_dump(restored_pid)
+        if ret != 0:
+            read_log_file(DUMP_LOG)
+            sys.exit(ret)
+
+    def _re_restore():
+        os.unlink("running")
+        ret = criu_ns_restore()
+        if ret != 0:
+            read_log_file(RESTORE_LOG)
+            sys.exit(ret)
+
+    check_dumpdir()
+    _dump()
+    _restore()
+    _redump()
+    _re_restore()
+
+
+def main():
+    test_dump_and_restore_with_shell_job()
+    test_dump_and_restore_without_shell_job()
+    test_dump_and_restore_without_shell_job(restore_detached=True)
+    test_dump_and_restore_in_pidns()
+
+
+if __name__ == "__main__":
+    run_task_with_own_pty(main)
diff --git a/test/others/env.sh b/test/others/env.sh
index 45066f760..6fa2c9691 100755
--- a/test/others/env.sh
+++ b/test/others/env.sh
@@ -1,17 +1,13 @@
 #!/bin/sh
 
-CRIU=$(readlink -f `dirname ${BASH_SOURCE[0]}`/../../criu/criu)
+BASE_DIR="$(readlink -f "$(dirname "${BASH_SOURCE[0]}")/../../")"
+
+CRIU="${BASE_DIR}/criu/criu"
 criu=$CRIU
-if [ $(which python3) ]; then
-	PYTHON=python3
-elif [ $(which python2) ]; then
-	PYTHON=python2
-else
-	echo "FAIL: Neither python3 nor python2"
-	exit 1
-fi
-#export PYTHON
-CRIT=$(readlink -f `dirname ${BASH_SOURCE[0]}`/../../crit/crit-"${PYTHON}")
+
+export PYTHONPATH="${BASE_DIR}/lib:${BASE_DIR}/crit:${PYTHONPATH-}"
+CRIT="python3 -m crit"
 crit=$CRIT
-CRIU_COREDUMP=$(readlink -f `dirname ${BASH_SOURCE[0]}`/../../coredump/coredump-"${PYTHON}")
+
+CRIU_COREDUMP="${BASE_DIR}/coredump/coredump"
 criu_coredump=$CRIU_COREDUMP
diff --git a/test/others/ext-tty/run.py b/test/others/ext-tty/run.py
index 8109033cb..2c268a2c8 100755
--- a/test/others/ext-tty/run.py
+++ b/test/others/ext-tty/run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 import subprocess
 import os, sys, time, signal, pty
 
diff --git a/test/others/libcriu/.gitignore b/test/others/libcriu/.gitignore
index 0f6e52bb4..30a56999c 100644
--- a/test/others/libcriu/.gitignore
+++ b/test/others/libcriu/.gitignore
@@ -8,3 +8,4 @@ test_pre_dump
 test_feature_check
 output/
 libcriu.so.*
+test_rpc_config
diff --git a/test/others/libcriu/Makefile b/test/others/libcriu/Makefile
index ae7330533..927f17c23 100644
--- a/test/others/libcriu/Makefile
+++ b/test/others/libcriu/Makefile
@@ -3,10 +3,12 @@ include ../../../../criu/Makefile.versions
 TESTS += test_sub
 TESTS += test_self
 TESTS += test_notify
+TESTS += test_rpc_config
 TESTS += test_iters
 TESTS += test_errno
 TESTS += test_join_ns
 TESTS += test_pre_dump
+TESTS += test_check
 TESTS += test_feature_check
 
 all: $(TESTS)
diff --git a/test/others/libcriu/run.sh b/test/others/libcriu/run.sh
index 77bdfb87e..6b36d4496 100755
--- a/test/others/libcriu/run.sh
+++ b/test/others/libcriu/run.sh
@@ -9,7 +9,7 @@ TEST_LOG="${TEST_DIR}/test.log"
 DUMP_LOG="${TEST_DIR}/dump.log"
 RESTORE_LOG="${TEST_DIR}/restore.log"
 
-# shellcheck disable=1091
+# shellcheck source=test/others/env.sh
 source "${MAIN_DIR}/../env.sh" || exit 1
 
 echo "== Clean"
@@ -55,6 +55,7 @@ run_test() {
 run_test test_sub
 run_test test_self
 run_test test_notify
+run_test test_rpc_config
 if [ "$(uname -m)" = "x86_64" ]; then
 	# Skip this on aarch64 as aarch64 has no dirty page tracking
 	run_test test_iters
@@ -62,6 +63,7 @@ if [ "$(uname -m)" = "x86_64" ]; then
 fi
 run_test test_errno
 run_test test_join_ns
+run_test test_check
 if criu check --feature mem_dirty_track > /dev/null; then
 	export CRIU_FEATURE_MEM_TRACK=1
 fi
diff --git a/test/others/libcriu/test_check.c b/test/others/libcriu/test_check.c
new file mode 100644
index 000000000..4af3b3630
--- /dev/null
+++ b/test/others/libcriu/test_check.c
@@ -0,0 +1,17 @@
+#include <stdio.h>
+#include "criu.h"
+#include "lib.h"
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+	printf("--- Start check ---\n");
+	criu_init_opts();
+	criu_set_service_binary(argv[1]);
+
+	if (criu_check())
+		return -1;
+
+	return 0;
+}
diff --git a/test/others/libcriu/test_rpc_config.c b/test/others/libcriu/test_rpc_config.c
new file mode 100644
index 000000000..529f13637
--- /dev/null
+++ b/test/others/libcriu/test_rpc_config.c
@@ -0,0 +1,223 @@
+#include "criu.h"
+#include "lib.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <string.h>
+#include <time.h>
+
+#define RANDOM_NAME_LEN 6
+#define PATH_BUF_SIZE	128
+
+static volatile sig_atomic_t stop = 0;
+static char base_name[RANDOM_NAME_LEN + 1];
+static char log_file[PATH_BUF_SIZE];
+static char conf_file[PATH_BUF_SIZE];
+
+static void handle_signal(int sig)
+{
+	(void)sig;
+	stop = 1;
+}
+
+static void generate_random_base_name(void)
+{
+	const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+	size_t charset_len;
+	int i;
+
+	charset_len = sizeof(charset) - 1;
+
+	for (i = 0; i < RANDOM_NAME_LEN; i++) {
+		base_name[i] = charset[rand() % charset_len];
+	}
+	base_name[i] = '\0';
+
+	snprintf(log_file, sizeof(log_file), "/tmp/criu-%s.log", base_name);
+	snprintf(conf_file, sizeof(conf_file), "/tmp/criu-%s.conf", base_name);
+}
+
+static int create_criu_config_file(void)
+{
+	int fd;
+	FILE *fp;
+
+	srand(time(NULL));
+	generate_random_base_name();
+
+	fd = open(conf_file, O_CREAT | O_EXCL | O_WRONLY, 0600);
+	if (fd < 0) {
+		perror("Failed to create config file");
+		return -1;
+	}
+
+	fp = fdopen(fd, "w");
+	if (!fp) {
+		perror("fdopen failed");
+		close(fd);
+		unlink(conf_file);
+		return -1;
+	}
+
+	fprintf(fp, "log-file=%s\n", log_file);
+	fflush(fp);
+	fclose(fp);
+
+	return 0;
+}
+
+static int check_log_file(void)
+{
+	struct stat st;
+
+	if (stat(log_file, &st) < 0) {
+		perror("Config file does not exist");
+		return -1;
+	}
+
+	if (st.st_size == 0) {
+		fprintf(stderr, "Config file is empty\n");
+		return -1;
+	}
+
+	unlink(log_file);
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int pipe_fd[2];
+	pid_t pid;
+	int ret;
+	int child_ret;
+
+	int img_fd = open(argv[2], O_DIRECTORY);
+	if (img_fd < 0) {
+		perror("Failed to open images directory");
+		goto cleanup;
+	}
+
+	if (create_criu_config_file() < 0) {
+		printf("Failed to create config file\n");
+		return EXIT_FAILURE;
+	}
+
+	if (pipe(pipe_fd) < 0) {
+		perror("pipe");
+		return EXIT_FAILURE;
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		perror("fork failed");
+		return EXIT_FAILURE;
+	}
+
+	if (pid == 0) {
+		/** child process **/
+		printf("   `- loop: initializing\n");
+
+		if (setsid() < 0 || signal(SIGUSR1, handle_signal) == SIG_ERR) {
+			_exit(EXIT_FAILURE);
+		}
+
+		close(STDIN_FILENO);
+		close(STDOUT_FILENO);
+		close(STDERR_FILENO);
+		close(pipe_fd[0]);
+
+		child_ret = SUCC_ECODE;
+		write(pipe_fd[1], &child_ret, sizeof(child_ret));
+		close(pipe_fd[1]);
+
+		while (!stop) {
+			sleep(1);
+		}
+
+		_exit(SUCC_ECODE);
+	}
+
+	/** parent process **/
+	close(pipe_fd[1]);
+
+	ret = -1;
+	if (read(pipe_fd[0], &ret, sizeof(ret)) != sizeof(ret) || ret != SUCC_ECODE) {
+		printf("Error starting loop\n");
+		goto cleanup;
+	}
+
+	read(pipe_fd[0], &ret, 1);
+	close(pipe_fd[0]);
+
+	printf("--- Loop process started (pid: %d) ---\n", pid);
+
+	printf("--- Checkpoint ---\n");
+	criu_init_opts();
+	criu_set_service_binary(argv[1]);
+	criu_set_images_dir_fd(img_fd);
+	criu_set_pid(pid);
+	criu_set_log_level(CRIU_LOG_DEBUG);
+
+	/* The RPC config file should overwrite the log-file set below */
+	printf("Setting dump RPC config file: %s\n", conf_file);
+	criu_set_config_file(conf_file);
+	criu_set_log_file("dump.log");
+
+	ret = criu_dump();
+	if (ret < 0) {
+		what_err_ret_mean(ret);
+		kill(pid, SIGKILL);
+		printf("criu dump failed\n");
+		goto cleanup;
+	}
+
+	printf("   `- Dump succeeded\n");
+	waitpid(pid, NULL, 0);
+
+	if (check_log_file()) {
+		printf("Error: log file not overwritten by RPC config file\n");
+		goto cleanup;
+	}
+
+	printf("--- Restore loop ---\n");
+	criu_init_opts();
+	criu_set_images_dir_fd(img_fd);
+	criu_set_log_level(CRIU_LOG_DEBUG);
+
+	/* The RPC config file should overwrite the log-file set below */
+	printf("Setting restore RPC config file: %s\n", conf_file);
+	criu_set_config_file(conf_file);
+	criu_set_log_file("restore.log");
+
+	pid = criu_restore_child();
+	if (pid <= 0) {
+		what_err_ret_mean(pid);
+		ret = EXIT_FAILURE;
+		goto cleanup;
+	}
+
+	printf("   `- Restore returned pid %d\n", pid);
+	kill(pid, SIGUSR1);
+
+	if (check_log_file()) {
+		printf("Error: log file not overwritten by RPC config file\n");
+		goto cleanup;
+	}
+
+cleanup:
+	if (waitpid(pid, &ret, 0) < 0) {
+		perror("waitpid failed");
+		return EXIT_FAILURE;
+	}
+
+	printf("Remove RPC config file: %s\n", conf_file);
+	unlink(conf_file);
+	return chk_exit(ret, SUCC_ECODE);
+}
diff --git a/test/others/mem-snap/run-predump-2.sh b/test/others/mem-snap/run-predump-2.sh
index 46af8063b..5ef1422b4 100755
--- a/test/others/mem-snap/run-predump-2.sh
+++ b/test/others/mem-snap/run-predump-2.sh
@@ -28,7 +28,7 @@ function stop_test {
 	wtime=1
 	cd ../../zdtm/static/
 	make maps04.stop
-	cat maps04.out | fgrep PASS || fail "Test failed"
+	fgrep PASS maps04.out || fail "Test failed"
 	echo "OK"
 }
 
diff --git a/test/others/mem-snap/run-predump.sh b/test/others/mem-snap/run-predump.sh
index d06d2d8fc..06ba74737 100755
--- a/test/others/mem-snap/run-predump.sh
+++ b/test/others/mem-snap/run-predump.sh
@@ -72,6 +72,6 @@ ${CRIU} restore -D "${IMGDIR}/$NRSNAP/" -o restore.log -d -v4 || fail "Fail to r
 
 cd ../../zdtm/static/
 make mem-touch.stop
-cat mem-touch.out | fgrep PASS || fail "Test failed"
+fgrep PASS mem-touch.out || fail "Test failed"
 
 echo "Test PASSED"
diff --git a/test/others/mem-snap/run-snap-auto-dedup.sh b/test/others/mem-snap/run-snap-auto-dedup.sh
index f77aa1fcb..a3801f5b4 100755
--- a/test/others/mem-snap/run-snap-auto-dedup.sh
+++ b/test/others/mem-snap/run-snap-auto-dedup.sh
@@ -84,7 +84,7 @@ ${CRIU} restore -D "${IMGDIR}/$NRSNAP/" -o restore.log -d -v4 || fail "Fail to r
 
 cd ../../zdtm/static/
 make mem-touch.stop
-cat mem-touch.out | fgrep PASS || fail "Test failed"
+fgrep PASS mem-touch.out || fail "Test failed"
 
 if [[ $dedup_ok_2 -ne 0 || $dedup_ok_1 -ne 0 ]]; then
 	fail "Dedup test failed"
diff --git a/test/others/mem-snap/run-snap-dedup-on-restore.sh b/test/others/mem-snap/run-snap-dedup-on-restore.sh
index 6ae050bc7..5dbb5bf44 100755
--- a/test/others/mem-snap/run-snap-dedup-on-restore.sh
+++ b/test/others/mem-snap/run-snap-dedup-on-restore.sh
@@ -78,7 +78,7 @@ fi
 
 cd ../../zdtm/static/
 make mem-touch.stop
-cat mem-touch.out | fgrep PASS || fail "Test failed"
+fgrep PASS mem-touch.out || fail "Test failed"
 
 if [ $restore_dedup_ok -ne 0 ]; then
 	fail "Dedup test failed"
diff --git a/test/others/mem-snap/run-snap-dedup.sh b/test/others/mem-snap/run-snap-dedup.sh
index 27fcd55a9..40db95325 100755
--- a/test/others/mem-snap/run-snap-dedup.sh
+++ b/test/others/mem-snap/run-snap-dedup.sh
@@ -90,7 +90,7 @@ ${CRIU} restore -D "${IMGDIR}/$NRSNAP/" -o restore.log -d -v4 || fail "Fail to r
 
 cd ../../zdtm/static/
 make mem-touch.stop
-cat mem-touch.out | fgrep PASS || fail "Test failed"
+fgrep PASS mem-touch.out || fail "Test failed"
 
 if [[ $dedup_ok_2 -ne 0 || $dedup_ok_1 -ne 0 ]]; then
 	fail "Dedup test failed"
diff --git a/test/others/mem-snap/run-snap-maps04.sh b/test/others/mem-snap/run-snap-maps04.sh
index 2def909d9..267d51deb 100755
--- a/test/others/mem-snap/run-snap-maps04.sh
+++ b/test/others/mem-snap/run-snap-maps04.sh
@@ -58,7 +58,7 @@ ${CRIU} restore -D "${IMGDIR}/$NRSNAP/" -o restore.log --auto-dedup -d -v4 || fa
 make -C ../../zdtm/static/ maps04.stop
 sleep 1
 
-cat "../zdtm/static/maps04.out" | fgrep PASS || fail "Test failed"
+fgrep PASS "../zdtm/static/maps04.out" || fail "Test failed"
 
 size=$(du -sh -BK  dump/1/pages-*.img | grep -Eo '[0-9]+' | head -1)
 if [ $size -ne 0 ] ; then
diff --git a/test/others/mem-snap/run-snap.sh b/test/others/mem-snap/run-snap.sh
index b97bd295e..c91cd0098 100755
--- a/test/others/mem-snap/run-snap.sh
+++ b/test/others/mem-snap/run-snap.sh
@@ -69,6 +69,6 @@ ${CRIU} restore -D "${IMGDIR}/$NRSNAP/" -o restore.log -d -v4 || fail "Fail to r
 
 cd ../../zdtm/static/
 make mem-touch.stop
-cat mem-touch.out | fgrep PASS || fail "Test failed"
+fgrep PASS mem-touch.out || fail "Test failed"
 
 echo "Test PASSED"
diff --git a/test/others/mnt-ext-dev/run.sh b/test/others/mnt-ext-dev/run.sh
index 5a1f44450..5cdbc45a8 100755
--- a/test/others/mnt-ext-dev/run.sh
+++ b/test/others/mnt-ext-dev/run.sh
@@ -2,16 +2,14 @@
 set -e -x
 
 # construct root
-python ../../zdtm.py run -t zdtm/static/env00 --iter 0 -f ns
+python3 ../../zdtm.py run -t zdtm/static/env00 --iter 0 -f ns
 
 truncate -s 0 zdtm.loop
 truncate -s 50M zdtm.loop
 mkfs.ext4 -F zdtm.loop
 dev=`losetup --find --show zdtm.loop`
-mkdir -p ../../dev
-cp -ap $dev ../../dev
 export ZDTM_MNT_EXT_DEV=$dev
-python ../../zdtm.py run $EXTRA_OPTS -t zdtm/static/mnt_ext_dev || ret=$?
+python3 ../../zdtm.py run $EXTRA_OPTS -t zdtm/static/mnt_ext_dev || ret=$?
 losetup -d $dev
 unlink zdtm.loop
 exit $ret
diff --git a/test/others/mounts/mounts.sh b/test/others/mounts/mounts.sh
index 19116d0cf..bed156a50 100755
--- a/test/others/mounts/mounts.sh
+++ b/test/others/mounts/mounts.sh
@@ -12,7 +12,7 @@ cd $INMNTNS
 
 mount --make-rprivate /
 
-for i in `cat /proc/self/mounts | awk '{ print $2 }'`; do
+for i in `awk '{ print $2 }' < /proc/self/mounts`; do
 	[ '/' = "$i" ] && continue
 	[ '/proc' = "$i" ] && continue
 	[ '/dev' = "$i" ] && continue
@@ -20,7 +20,7 @@ for i in `cat /proc/self/mounts | awk '{ print $2 }'`; do
 	umount -l $i
 done
 
-python mounts.py
+python3 mounts.py
 kill $INMNTNS_PID
 while :; do
 	sleep 10
diff --git a/test/others/mounts/run.sh b/test/others/mounts/run.sh
index 35927fb5e..d665a726a 100755
--- a/test/others/mounts/run.sh
+++ b/test/others/mounts/run.sh
@@ -12,12 +12,12 @@ kill -0 $pid || exit
 cat /proc/$pid/mountinfo | sort -k 4
 echo "Suspend server"
 ${CRIU} dump -D dump -o dump.log -t $pid -v4 || {
-	cat dump/dump.log | grep Error
+	grep Error dump/dump.log
 	exit 1
 }
 echo "Resume server"
 ${CRIU} restore -d -D dump -o restore.log -v4 || {
-	cat dump/dump.log | grep Error
+	grep Error dump/dump.log 
 	exit 1
 }
 cat /proc/$pid/mountinfo | sort -k 4
diff --git a/test/others/ns_ext/run.sh b/test/others/ns_ext/run.sh
index 2e9a6fe86..5d1e139d7 100755
--- a/test/others/ns_ext/run.sh
+++ b/test/others/ns_ext/run.sh
@@ -2,10 +2,13 @@
 
 set -x
 
+if ! ../../zdtm/static/macvlan.checkskip; then
+	echo "No macvlan support. Skipping"
+	exit 0
+fi
+
 if [[ "$1" == "pid" ]]; then
 	NS=pid
-	# CentOS 7 kernels do not have NSpid -> skip this test
-	grep NSpid /proc/self/status || exit 0
 else
 	NS=net
 fi
@@ -61,7 +64,7 @@ exec 33< $MNT1
 exec 34< $MNT2
 $CRIU dump -v4 -t $pid -o dump.log -D images --external $NS[$ino]:test_ns --external $NS[$ino2]:test_ns2
 RESULT=$?
-cat images/dump.log | grep -B 5 Error || echo ok
+grep -B 5 Error images/dump.log || echo ok
 [ "$RESULT" != "0" ] && {
 	echo "CRIU dump failed"
 	echo FAIL
@@ -70,7 +73,7 @@ cat images/dump.log | grep -B 5 Error || echo ok
 
 $CRIU restore -v4 -o restore.log -D images --inherit-fd fd[33]:test_ns --inherit-fd fd[34]:test_ns2 -d
 RESULT=$?
-cat images/restore.log | grep -B 5 Error || echo ok
+grep -B 5 Error images/restore.log || echo ok
 [ "$RESULT" != "0" ] && {
 	echo "CRIU restore failed"
 	echo FAIL
diff --git a/test/others/ns_ext/run_pidns.sh b/test/others/ns_ext/run_pidns.sh
index 7ac855a18..db12106e0 100755
--- a/test/others/ns_ext/run_pidns.sh
+++ b/test/others/ns_ext/run_pidns.sh
@@ -2,9 +2,6 @@
 
 set -e
 
-# CentOS 7 kernels do not have NSpid -> skip this test
-grep NSpid /proc/self/status || exit 0
-
 # This test creates a process in non-host pidns and then dumps it and restores
 # it into host pidns. We use pid >100000 in non-host pidns to make sure it does
 # not intersect with some host pid on restore but it is potentially racy so
@@ -36,7 +33,7 @@ mkdir -p images_pidns
 echo "$CRIU dump -v4 -o dump.log -t $PID -D images_pidns --external $PIDNS:exti"
 $CRIU dump -v4 -o dump.log -t $PID -D images_pidns --external $PIDNS:exti
 RESULT=$?
-cat images_pidns/dump.log | grep -B 5 Error || echo ok
+grep -B 5 Error images_pidns/dump.log || echo ok
 [ "$RESULT" != "0" ] && {
 	echo "CRIU dump failed"
 	echo FAIL
@@ -48,7 +45,7 @@ exec {pidns_fd}< /proc/self/ns/pid
 echo "$CRIU restore -v4 -o restore.log -D images_pidns --restore-detached --inherit-fd fd[$pidns_fd]:exti"
 $CRIU restore -v4 -o restore.log -D images_pidns --restore-detached --inherit-fd fd[$pidns_fd]:exti --pidfile test.pidfile
 RESULT=$?
-cat images_pidns/restore.log | grep -B 5 Error || echo ok
+grep -B 5 Error images_pidns/restore.log || echo ok
 [ "$RESULT" != "0" ] && {
 	echo "CRIU restore failed"
 	echo FAIL
diff --git a/test/others/pycriu/.gitignore b/test/others/pycriu/.gitignore
new file mode 100644
index 000000000..567609b12
--- /dev/null
+++ b/test/others/pycriu/.gitignore
@@ -0,0 +1 @@
+build/
diff --git a/test/others/pycriu/Makefile b/test/others/pycriu/Makefile
new file mode 100644
index 000000000..b6e3b4814
--- /dev/null
+++ b/test/others/pycriu/Makefile
@@ -0,0 +1,63 @@
+.SHELLFLAGS := -eu -o pipefail -c
+.ONESHELL:
+
+CRIU ?= ../../../criu/criu
+BUILD_DIR ?= build
+SOCKET_NAME ?= criu_service.socket
+PIDFILE_NAME ?= pidfile
+SERVICE_LOG ?= service.log
+PYTHON ?= python3
+
+PIDFILE := $(BUILD_DIR)/$(PIDFILE_NAME)
+CRIU_SOCKET := $(BUILD_DIR)/$(SOCKET_NAME)
+STATUS_FIFO := $(BUILD_DIR)/startup.status
+STATUS_FD := 200
+
+run: start
+	cleanup() { $(MAKE) --no-print-directory stop || true; }
+	trap cleanup EXIT INT TERM
+	"$(PYTHON)" test_check.py
+	"$(PYTHON)" test_check_fail.py
+	"$(PYTHON)" test_check_images_dir.py
+	"$(PYTHON)" test_check_work_dir_fd.py
+
+start:
+	mkdir -p "$(BUILD_DIR)"
+	if [ -s "$(PIDFILE)" ] && kill -0 "$$(cat "$(PIDFILE)")" 2>/dev/null; then
+		echo "Service running (PID $$(cat "$(PIDFILE)"))."
+		exit 0
+	fi
+	if ! command -v "$(CRIU)" >/dev/null 2>&1; then
+		echo "CRIU not found at $(CRIU)"
+		exit 1
+	fi
+	mkfifo "$(STATUS_FIFO)"
+	exec $(STATUS_FD)<>"$(STATUS_FIFO)"
+	"$(CRIU)" service \
+		-v4 \
+		-W "$(BUILD_DIR)" \
+		--address "$(SOCKET_NAME)" \
+		-d \
+		--pidfile "$(PIDFILE_NAME)" \
+		-o "$(SERVICE_LOG)" \
+		--status-fd "$(STATUS_FD)"
+	"$(PYTHON)" read.py "$(STATUS_FIFO)"
+
+stop:
+	if [ ! -s "$(PIDFILE)" ]; then
+		echo "pidfile missing or empty"
+		exit 1
+	fi
+	pid=$$(cat "$(PIDFILE)")
+	if kill -0 "$$pid" 2>/dev/null; then
+		kill -9 "$$pid" || true
+	fi
+	rm -f "$(PIDFILE)" "$(CRIU_SOCKET)" "$(STATUS_FIFO)"
+
+clean:
+	if [ -s "$(PIDFILE)" ] && kill -0 "$$(cat "$(PIDFILE)")" 2>/dev/null; then
+		kill -9 "$$(cat "$(PIDFILE)")" || true
+	fi
+	rm -rf "$(BUILD_DIR)"
+
+.PHONY: start stop clean run
\ No newline at end of file
diff --git a/test/others/pycriu/read.py b/test/others/pycriu/read.py
new file mode 120000
index 000000000..c2c1e1365
--- /dev/null
+++ b/test/others/pycriu/read.py
@@ -0,0 +1 @@
+../rpc/read.py
\ No newline at end of file
diff --git a/test/others/pycriu/test_check.py b/test/others/pycriu/test_check.py
new file mode 100755
index 000000000..9888158db
--- /dev/null
+++ b/test/others/pycriu/test_check.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+import os
+import sys
+
+# Add ../../../lib so we can import pycriu
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+LIB_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "../../../lib"))
+if LIB_DIR not in sys.path:
+    sys.path.insert(0, LIB_DIR)
+
+import pycriu  # noqa: E402
+
+def main():
+    socket_path = os.path.join(SCRIPT_DIR, "build", "criu_service.socket")
+
+    criu = pycriu.criu()
+    criu.use_sk(socket_path)
+
+    try:
+        criu.check()
+    except Exception as e:
+        print(f"FAIL: {e}")
+        return 1
+
+    print("PASS")
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/test/others/pycriu/test_check_fail.py b/test/others/pycriu/test_check_fail.py
new file mode 100755
index 000000000..b5634c60b
--- /dev/null
+++ b/test/others/pycriu/test_check_fail.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+import os
+import sys
+
+# Add ../../../lib so we can import pycriu
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+LIB_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "../../../lib"))
+if LIB_DIR not in sys.path:
+    sys.path.insert(0, LIB_DIR)
+
+import pycriu  # noqa: E402
+
+def main():
+    socket_path = os.path.join(SCRIPT_DIR, "build", "criu_service.socket")
+
+    criu = pycriu.criu()
+    criu.use_sk(socket_path)
+
+    # Intentionally set only log_file (no images/work dir) to ensure check() fails
+    criu.opts.log_file = "check.log"
+
+    try:
+        criu.check()
+    except Exception:
+        print("PASS")
+        return 0
+
+    print("FAIL: check() did not fail when log_file is set without images/work dir")
+    return 1
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/test/others/pycriu/test_check_images_dir.py b/test/others/pycriu/test_check_images_dir.py
new file mode 100755
index 000000000..f479c2a88
--- /dev/null
+++ b/test/others/pycriu/test_check_images_dir.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+import os
+import sys
+
+# Add ../../../lib so we can import pycriu
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+LIB_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "../../../lib"))
+if LIB_DIR not in sys.path:
+    sys.path.insert(0, LIB_DIR)
+
+import pycriu  # noqa: E402
+
+def _log_path(images_dir, log_file):
+    return log_file if os.path.isabs(log_file) else os.path.join(images_dir, log_file)
+
+def main():
+    build_dir = os.path.join(SCRIPT_DIR, "build")
+    socket_path = os.path.join(build_dir, "criu_service.socket")
+
+    criu = pycriu.criu()
+    criu.use_sk(socket_path)
+
+    criu.opts.images_dir = build_dir
+    criu.opts.log_file = "check.log"
+    criu.opts.log_level = 4
+
+    try:
+        criu.check()
+    except Exception as e:
+        lp = _log_path(build_dir, criu.opts.log_file)
+        msg = f"FAIL: {e} ({'see log: ' + lp if os.path.exists(lp) else 'no log found'})"
+        print(msg)
+        return 1
+
+    lp = _log_path(build_dir, criu.opts.log_file)
+    if not (os.path.isfile(lp) and os.path.getsize(lp) > 0):
+        print(f"FAIL: log file missing or empty: {lp}")
+        return 1
+
+    print("PASS")
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/test/others/pycriu/test_check_work_dir_fd.py b/test/others/pycriu/test_check_work_dir_fd.py
new file mode 100755
index 000000000..e20a83097
--- /dev/null
+++ b/test/others/pycriu/test_check_work_dir_fd.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+import os
+import sys
+
+# Add ../../../lib so we can import pycriu
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+LIB_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "../../../lib"))
+if LIB_DIR not in sys.path:
+    sys.path.insert(0, LIB_DIR)
+
+import pycriu  # noqa: E402
+
+def main():
+    build_dir = os.path.join(SCRIPT_DIR, "build")
+    socket_path = os.path.join(build_dir, "criu_service.socket")
+    os.makedirs(build_dir, exist_ok=True)
+
+    # Open a directory FD to use as work_dir_fd (prefer O_PATH if available)
+    flags = getattr(os, "O_PATH", 0) or os.O_RDONLY
+    fd = os.open(build_dir, flags)
+
+    criu = pycriu.criu()
+    criu.use_sk(socket_path)
+
+    criu.opts.work_dir_fd = fd
+    criu.opts.log_file = "check.log"
+    criu.opts.log_level = 4
+
+    try:
+        criu.check()
+    except Exception as e:
+        print(f"FAIL: {e}")
+        return 1
+    finally:
+        try:
+            os.close(fd)
+        except Exception:
+            pass
+
+    print("PASS")
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/test/others/rpc/Makefile b/test/others/rpc/Makefile
index fc64f0c97..c0e56d528 100644
--- a/test/others/rpc/Makefile
+++ b/test/others/rpc/Makefile
@@ -4,13 +4,22 @@ all: test-c rpc_pb2.py criu
 CFLAGS += -g -Werror -Wall -I.
 LDLIBS +=  -lprotobuf-c
 
-PYTHON ?= python
+PYTHON ?= python3
 
 run: all
 	@make -C .. loop
-	mkdir -p build
+	mkdir -p build/{imgs_errno,imgs_ps,imgs_c,imgs_loop,imgs_py}
 	chmod a+rwx build
+	chmod a+rwx build/{imgs_errno,imgs_ps,imgs_c,imgs_loop,imgs_py}
 	rm -f build/status
+	rm -f build/_marker_*
+	@# Create all log files to be accessible for anybody
+	@# so that they can be displayed by any user.
+	for i in imgs_errno/criu.log imgs_ps/page-server.log imgs_ps/dump.log \
+		imgs_c/restore-c.log imgs_loop/criu.log imgs_loop/dump-loop.log \
+		imgs_py/criu.log imgs_py/restore-py.log imgs_c/criu.log service.log; do \
+		touch build/$$i; chmod 666 build/$$i; \
+	done
 	sudo -g '#1000' -u '#1000' mkfifo build/status
 	@# Need to start the criu daemon here to access the pidfile.
 	@# The script read.py is used to wait until 'criu service'
@@ -39,7 +48,7 @@ rpc_pb2.py: rpc.proto
 	protoc --proto_path=. --python_out=. rpc.proto
 
 rpc.pb-c.c: rpc.proto
-	protoc-c --proto_path=. --c_out=. rpc.proto
+	protoc --proto_path=. --c_out=. rpc.proto
 
 clean:
 	rm -rf build rpc.pb-c.o test-c.o test-c rpc.pb-c.c rpc.pb-c.h rpc_pb2.py rpc_pb2.pyc criu
diff --git a/test/others/rpc/action-script.sh b/test/others/rpc/action-script.sh
new file mode 100755
index 000000000..991e315de
--- /dev/null
+++ b/test/others/rpc/action-script.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+MARKER_FILE="_marker_${CRTOOLS_SCRIPT_ACTION}"
+
+if [ -z "$CRTOOLS_SCRIPT_ACTION" ]; then
+	echo "Error: CRTOOLS_SCRIPT_ACTION is not set."
+	exit 2
+fi
+
+if [ ! -f "$MARKER_FILE" ]; then
+	touch "$MARKER_FILE"
+else
+	echo "Error: Running the same action hook for the second time"
+	exit 1
+fi
+
+exit 0
diff --git a/test/others/rpc/config_file.py b/test/others/rpc/config_file.py
index 90c80fcae..c1a8276d8 100755
--- a/test/others/rpc/config_file.py
+++ b/test/others/rpc/config_file.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 import argparse
 import os
@@ -13,6 +13,9 @@ from setup_swrk import setup_swrk
 log_file = 'config_file_test.log'
 does_not_exist = 'does-not.exist'
 
+script_path = os.path.dirname(os.path.abspath(__file__))
+action_script_file = os.path.join(script_path, 'action-script.sh')
+
 
 def setup_config_file(content):
     # Creating a temporary file which will be used as configuration file.
@@ -89,29 +92,37 @@ def test_broken_configuration_file():
         sys.exit(-1)
 
 
-def search_in_log_file(log, message):
-    with open(os.path.join(args['dir'], log)) as f:
+def search_in_log_file(log_path, message):
+    with open(log_path) as f:
         if message not in f.read():
-            print(
-                'FAIL: Missing the expected error message (%s) in the log file'
-                % message)
+            print('FAIL: Missing the expected error message (%s) in the log file' % message)
             sys.exit(-1)
 
 
+def print_log_file(log_path):
+    print("\n--- Begin log file: %s ---" % log_path)
+    with open(log_path, 'r') as f:
+        print(f.read())
+    print("--- End log file ---\n")
+
+
 def check_results(resp, log):
     # Check if the specified log file exists
-    if not os.path.isfile(os.path.join(args['dir'], log)):
+    log_path = os.path.join(args['dir'], log)
+    if not os.path.isfile(log_path):
         print('FAIL: Expected log file %s does not exist' % log)
         sys.exit(-1)
     # Dump should have failed with: 'The criu itself is within dumped tree'
     if resp.type != rpc.DUMP:
         print('FAIL: Unexpected msg type %r' % resp.type)
+        print_log_file(log_path)
         sys.exit(-1)
     if 'The criu itself is within dumped tree' not in resp.cr_errmsg:
         print('FAIL: Missing the expected error message in RPC response')
+        print_log_file(log_path)
         sys.exit(-1)
     # Look into the log file for the same message
-    search_in_log_file(log, 'The criu itself is within dumped tree')
+    search_in_log_file(log_path, 'The criu itself is within dumped tree')
 
 
 def test_rpc_without_configuration_file():
@@ -156,6 +167,7 @@ def test_rpc_with_configuration_file_overwriting_rpc():
     # file settings in the default configuration.
     log = does_not_exist
     content = 'log-file ' + log + '\n'
+    content += 'action-script ' + action_script_file + '\n'
     content += 'no-tcp-established\nno-shell-job'
     path = setup_config_file(content)
     # Only set the configuration file via RPC;
@@ -180,11 +192,18 @@ args = vars(parser.parse_args())
 
 cleanup_output(args['dir'])
 
+print("*** Test broken config file ***")
 test_broken_configuration_file()
 cleanup_output(args['dir'])
+
+print("*** Test RPC without config file ***")
 test_rpc_without_configuration_file()
 cleanup_output(args['dir'])
+
+print("*** Test RPC with config file ***")
 test_rpc_with_configuration_file()
 cleanup_output(args['dir'])
+
+print("*** Test configuration file overwriting RPC ***")
 test_rpc_with_configuration_file_overwriting_rpc()
 cleanup_output(args['dir'])
diff --git a/test/others/rpc/errno.py b/test/others/rpc/errno.py
index f84757efd..ea841199f 100755
--- a/test/others/rpc/errno.py
+++ b/test/others/rpc/errno.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 # Test criu errno
 
 import socket, os, errno
@@ -40,7 +40,7 @@ class test:
         resp.ParseFromString(self.s.recv(self._MAX_MSG_SIZE))
         return resp
 
-    def check_resp(self, resp, typ, err):
+    def check_resp(self, resp, typ, err, errmsg = None):
         if resp.type != typ:
             raise Exception('Unexpected response type ' + str(resp.type))
 
@@ -50,6 +50,9 @@ class test:
         if err and resp.cr_errno != err:
             raise Exception('Unexpected cr_errno ' + str(resp.cr_errno))
 
+        if errmsg and errmsg not in str(resp.cr_errmsg):
+            raise Exception('Unexpected cr_msg \'' + str(resp.cr_errmsg) + '\'')
+
     def no_process(self):
         print('Try to dump unexisting process')
         # Get pid of non-existing process.
@@ -67,6 +70,7 @@ class test:
         req = self.get_base_req()
         req.type = rpc.DUMP
         req.opts.pid = pid
+        req.opts.network_lock = rpc.SKIP
 
         self.send_req(req)
         resp = self.recv_resp()
@@ -84,6 +88,7 @@ class test:
         req = self.get_base_req()
         req.type = rpc.DUMP
         req.opts.leave_running = True
+        req.opts.network_lock = rpc.SKIP
 
         self.send_req(req)
         resp = self.recv_resp()
@@ -130,11 +135,27 @@ class test:
 
         print('Success')
 
+    def child_first_err(self):
+        print('Receive correct first error message')
+
+        req = self.get_base_req()
+        req.type = rpc.CHECK
+        # Log file must not have subdirectory
+        req.opts.log_file = "/foo/bar.log"
+
+        self.send_req(req)
+        resp = self.recv_resp()
+
+        self.check_resp(resp, rpc.CHECK, None, "No subdirs are allowed in log_file name")
+
+        print('Success')
+
     def run(self):
         self.no_process()
         self.process_exists()
         self.bad_options()
         self.bad_request()
+        self.child_first_err()
 
 
 t = test()
diff --git a/test/others/rpc/ps_test.py b/test/others/rpc/ps_test.py
index b51357d42..259f22e77 100755
--- a/test/others/rpc/ps_test.py
+++ b/test/others/rpc/ps_test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 import socket, os, sys, errno
 import rpc_pb2 as rpc
@@ -23,6 +23,7 @@ req.type = rpc.PAGE_SERVER
 req.opts.log_file = 'page-server.log'
 req.opts.log_level = 4
 req.opts.images_dir_fd = os.open(args['dir'], os.O_DIRECTORY)
+req.opts.network_lock = rpc.SKIP
 
 s.send(req.SerializeToString())
 
diff --git a/test/others/rpc/read.py b/test/others/rpc/read.py
old mode 100644
new mode 100755
diff --git a/test/others/rpc/restore-loop.py b/test/others/rpc/restore-loop.py
index 84a2ce56d..67110c2cf 100755
--- a/test/others/rpc/restore-loop.py
+++ b/test/others/rpc/restore-loop.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 import socket, os, sys
 import rpc_pb2 as rpc
diff --git a/test/others/rpc/run.sh b/test/others/rpc/run.sh
index 9be577587..b6158dfea 100755
--- a/test/others/rpc/run.sh
+++ b/test/others/rpc/run.sh
@@ -1,16 +1,9 @@
 #!/bin/bash
 
-set -ex
-
-if [ -e /etc/os-release ]; then
-	. /etc/os-release
-	if [ "$ID" == "centos" ] && [[ "$VERSION_ID" == "7"* ]];then
-		echo "Skipping tests on CentOS 7 because they do not work in CI"
-		exit 0
-	fi
-fi
+set -e
 
 CRIU=./criu
+FAIL=1
 
 export PROTODIR=`readlink -f "${PWD}/../../protobuf"`
 
@@ -27,6 +20,13 @@ function stop_server {
 	title_print "Shutdown service server"
 	kill -SIGTERM $(cat build/pidfile)
 	unlink build/pidfile
+	if [ "${FAIL}" == "1" ]; then
+		for i in build/output*; do
+			echo "File: $i"
+			cat $i
+		done
+		find . -name "*.log" -print -exec cat {} \; || true
+	fi
 }
 
 function test_c {
@@ -59,7 +59,7 @@ function test_restore_loop {
 	title_print "Dump loop process"
 	# So theoretically '-j' (--shell-job) should not be necessary, but on alpine
 	# this test fails without it.
-	${CRIU} dump -j -v4 -o dump-loop.log -D build/imgs_loop -t ${P}
+	${CRIU} dump -j -v4 -o dump-loop.log --network-lock skip -D build/imgs_loop -t ${P}
 
 	title_print "Run restore-loop"
 	./restore-loop.py build/criu_service.socket build/imgs_loop
@@ -88,6 +88,8 @@ test_restore_loop
 test_ps
 test_errno
 
+FAIL=0
+
 stop_server
 
 trap 'echo "Success"' EXIT
diff --git a/test/others/rpc/setup_swrk.py b/test/others/rpc/setup_swrk.py
index c7f84f952..ffaa01de4 100644
--- a/test/others/rpc/setup_swrk.py
+++ b/test/others/rpc/setup_swrk.py
@@ -5,12 +5,6 @@ import subprocess
 def setup_swrk():
     print('Connecting to CRIU in swrk mode.')
     s1, s2 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET)
-
-    kwargs = {}
-    if sys.version_info.major == 3:
-        kwargs["pass_fds"] = [s1.fileno()]
-
-    swrk = subprocess.Popen(['./criu', "swrk", "%d" % s1.fileno()], **kwargs)
+    swrk = subprocess.Popen(['./criu', "swrk", "%d" % s1.fileno()], pass_fds=[s1.fileno()])
     s1.close()
     return swrk, s2
-
diff --git a/test/others/rpc/test-c.c b/test/others/rpc/test-c.c
index 792dbbf9c..b3507975f 100644
--- a/test/others/rpc/test-c.c
+++ b/test/others/rpc/test-c.c
@@ -99,6 +99,8 @@ int main(int argc, char *argv[])
 	req.opts->images_dir_fd = dir_fd;
 	req.opts->has_log_level = true;
 	req.opts->log_level = 4;
+	req.opts->has_network_lock = true;
+	req.opts->network_lock = CRIU_NETWORK_LOCK_METHOD__SKIP;
 
 	/*
 	 * Connect to service socket
diff --git a/test/others/rpc/test.py b/test/others/rpc/test.py
index 80f6338f4..6f692f755 100755
--- a/test/others/rpc/test.py
+++ b/test/others/rpc/test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 import socket, os, sys
 import rpc_pb2 as rpc
@@ -24,6 +24,7 @@ req.type = rpc.DUMP
 req.opts.leave_running = True
 req.opts.log_level = 4
 req.opts.images_dir_fd = os.open(args['dir'], os.O_DIRECTORY)
+req.opts.network_lock = rpc.SKIP
 
 # Send request
 s.send(req.SerializeToString())
diff --git a/test/others/rpc/version.py b/test/others/rpc/version.py
index 9d7fa745b..a18cd5b7b 100755
--- a/test/others/rpc/version.py
+++ b/test/others/rpc/version.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 import sys
 import rpc_pb2 as rpc
diff --git a/test/others/shell-job/run.py b/test/others/shell-job/run.py
index a59945d6a..969965f00 100755
--- a/test/others/shell-job/run.py
+++ b/test/others/shell-job/run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 import os, pty, sys, subprocess
 import termios, fcntl, time
 
diff --git a/test/others/skip-file-rwx-check/Makefile b/test/others/skip-file-rwx-check/Makefile
new file mode 100644
index 000000000..419d592b7
--- /dev/null
+++ b/test/others/skip-file-rwx-check/Makefile
@@ -0,0 +1,7 @@
+.PHONY: run clean
+
+run:
+	./run.sh
+
+clean:
+	rm -rf testfile *.img dump.log restore-expected-fail.log restore.log stats-dump stats-restore
diff --git a/test/others/skip-file-rwx-check/run.sh b/test/others/skip-file-rwx-check/run.sh
new file mode 100755
index 000000000..0776ebf61
--- /dev/null
+++ b/test/others/skip-file-rwx-check/run.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+set -o errexit
+set -o nounset
+set -o pipefail
+set -o xtrace
+
+source ../env.sh
+
+make clean
+touch testfile
+chmod +w testfile
+bash -c 'exec 3<testfile; while :; do sleep 1; done' &
+testpid=$!
+if ! "$criu" dump --tree=$testpid --shell-job --verbosity=4 --log-file=dump.log
+then
+    kill $testpid
+    echo "Failed to dump process as expected"
+    echo FAIL
+    exit 1
+fi
+chmod -w testfile
+if "$criu" restore --restore-detached --shell-job --verbosity=4 --log-file=restore-expected-fail.log
+then
+    kill $testpid
+    echo "Unexpectedly restored process with reference to a file who's r/w/x perms changed when --skip-file-rwx-check option was not used"
+    echo FAIL
+    exit 1
+fi
+if ! "$criu" restore --skip-file-rwx-check --restore-detached --shell-job --verbosity=4 --log-file=restore.log
+then
+    echo "Failed to restore process with reference to a file who's r/w/x perms changed when --skip-file-rwx-check option was used"
+    echo FAIL
+    exit 1
+fi
+kill $testpid
+echo PASS
diff --git a/test/others/unix-callback/Makefile b/test/others/unix-callback/Makefile
index 25bcf228b..984044077 100644
--- a/test/others/unix-callback/Makefile
+++ b/test/others/unix-callback/Makefile
@@ -4,7 +4,7 @@ run: all
 	./run.sh
 
 unix.pb-c.c: unix.proto
-	protoc-c --proto_path=. --c_out=. unix.proto
+	protoc --proto_path=. --c_out=. unix.proto
 
 unix-lib.so: unix-lib.c unix.pb-c.c
 	gcc -g -Werror -Wall -shared -nostartfiles unix-lib.c unix.pb-c.c -o unix-lib.so -iquote ../../../criu/include -fPIC
diff --git a/test/others/unix-callback/run.sh b/test/others/unix-callback/run.sh
index ec5b7f54e..b15daa289 100755
--- a/test/others/unix-callback/run.sh
+++ b/test/others/unix-callback/run.sh
@@ -40,7 +40,7 @@ done
 ${CRIU} restore -D data -o restore.log -v4 --lib `pwd`/lib -d || exit 1
 kill $pid
 while :; do
-	cat output | grep PASS && break
+	grep PASS output && break
 	sleep 1
 done
 
diff --git a/test/plugins/.gitignore b/test/plugins/.gitignore
new file mode 100644
index 000000000..140f8cf80
--- /dev/null
+++ b/test/plugins/.gitignore
@@ -0,0 +1 @@
+*.so
diff --git a/test/plugins/Makefile b/test/plugins/Makefile
new file mode 100644
index 000000000..4f620ad50
--- /dev/null
+++ b/test/plugins/Makefile
@@ -0,0 +1,32 @@
+SRC_DIR := ../../plugins
+PLUGIN_TARGETS := inventory_test_enabled_plugin.so inventory_test_disabled_plugin.so amdgpu_plugin.so cuda_plugin.so
+
+ARCH	:= x86
+
+PLUGIN_INCLUDE	:= -iquote../../include
+PLUGIN_INCLUDE	+= -iquote../../criu/include
+PLUGIN_INCLUDE	+= -iquote../../criu/arch/$(ARCH)/include/
+PLUGIN_INCLUDE	+= -iquote../../
+PLUGIN_CFLAGS	:= -g -Wall -Werror -shared -nostartfiles -fPIC
+
+# Silent make rules.
+Q := @
+
+all: $(PLUGIN_TARGETS)
+
+amdgpu_plugin.so: $(SRC_DIR)/amdgpu/amdgpu_plugin.so
+	$(Q) cp $< $@
+
+cuda_plugin.so: $(SRC_DIR)/cuda/cuda_plugin.so
+	$(Q) cp $< $@
+
+inventory_test_enabled_plugin.so: inventory_test_enabled_plugin.c
+	$(Q) $(CC) $(PLUGIN_CFLAGS) $< -o $@ $(PLUGIN_INCLUDE)
+
+inventory_test_disabled_plugin.so: inventory_test_disabled_plugin.c
+	$(Q) $(CC) $(PLUGIN_CFLAGS) $< -o $@ $(PLUGIN_INCLUDE)
+
+clean:
+	$(Q) $(RM) $(PLUGIN_TARGETS)
+
+.PHONY: all clean
diff --git a/test/plugins/inventory_test_disabled_plugin.c b/test/plugins/inventory_test_disabled_plugin.c
new file mode 100644
index 000000000..468fe924b
--- /dev/null
+++ b/test/plugins/inventory_test_disabled_plugin.c
@@ -0,0 +1,17 @@
+#include "criu-plugin.h"
+#include "image.h"
+
+int inventory_test_disabled_plugin_init(int stage)
+{
+	if (stage == CR_PLUGIN_STAGE__RESTORE)
+		return check_and_remove_inventory_plugin(CR_PLUGIN_DESC.name, strlen(CR_PLUGIN_DESC.name));
+
+	return 0;
+}
+
+void inventory_test_disabled_plugin_fini(int stage, int ret)
+{
+	return;
+}
+
+CR_PLUGIN_REGISTER("inventory_test_disabled_plugin", inventory_test_disabled_plugin_init, inventory_test_disabled_plugin_fini)
\ No newline at end of file
diff --git a/test/plugins/inventory_test_enabled_plugin.c b/test/plugins/inventory_test_enabled_plugin.c
new file mode 100644
index 000000000..89e684e2a
--- /dev/null
+++ b/test/plugins/inventory_test_enabled_plugin.c
@@ -0,0 +1,17 @@
+#include "criu-plugin.h"
+#include "image.h"
+
+int inventory_test_enabled_plugin_init(int stage)
+{
+	if (stage == CR_PLUGIN_STAGE__RESTORE)
+		return !check_and_remove_inventory_plugin(CR_PLUGIN_DESC.name, strlen(CR_PLUGIN_DESC.name));
+
+	return add_inventory_plugin(CR_PLUGIN_DESC.name);
+}
+
+void inventory_test_enabled_plugin_fini(int stage, int ret)
+{
+	return;
+}
+
+CR_PLUGIN_REGISTER("inventory_test_enabled_plugin", inventory_test_enabled_plugin_init, inventory_test_enabled_plugin_fini)
\ No newline at end of file
diff --git a/test/pycriu b/test/pycriu
index d13a8790a..d1b6ed5c4 120000
--- a/test/pycriu
+++ b/test/pycriu
@@ -1 +1 @@
-../lib/py/
\ No newline at end of file
+../lib/pycriu
\ No newline at end of file
diff --git a/test/zdtm.py b/test/zdtm.py
index c011c79c0..e21356c30 100755
--- a/test/zdtm.py
+++ b/test/zdtm.py
@@ -1,10 +1,4 @@
-#!/usr/bin/env python
-from __future__ import (
-    absolute_import,
-    division,
-    print_function,
-    unicode_literals
-)
+#!/usr/bin/env python3
 
 import argparse
 import atexit
@@ -28,19 +22,35 @@ import sys
 import tempfile
 import time
 import uuid
+import site
 from builtins import input, int, open, range, str, zip
 
 import yaml
 
-import pycriu as crpc
 from zdtm.criu_config import criu_config
 
 # File to store content of streamed images
 STREAMED_IMG_FILE_NAME = "img.criu"
 
+# A library used to preload C functions to simulate
+# cases such as partial read with pread().
+LIBFAULT_PATH = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)),
+    "libfault",
+    "libfault.so"
+)
+
+# A directory that contains the CRIU plugins.
+PLUGINS_DIR = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)),
+    "plugins"
+)
+
 prev_line = None
 uuid = uuid.uuid4()
 
+NON_ROOT_UID = 65534
+
 
 def alarm(*args):
     print("==== ALARM ====")
@@ -70,18 +80,32 @@ tests_root = None
 def clean_tests_root():
     global tests_root
     if tests_root and tests_root[0] == os.getpid():
+        subprocess.call(["./umount2", os.path.join(tests_root[1], "dev")])
+        os.rmdir(os.path.join(tests_root[1], "root/root"))
         os.rmdir(os.path.join(tests_root[1], "root"))
+        os.rmdir(os.path.join(tests_root[1], "dev"))
         os.rmdir(tests_root[1])
 
 
 def make_tests_root():
     global tests_root
     if not tests_root:
-        tests_root = (os.getpid(), tempfile.mkdtemp("", "criu-root-", "/tmp"))
+        tmpdir = os.environ.get("TMPDIR", "/tmp")
+        tests_root = (os.getpid(), tempfile.mkdtemp("", "criu-root-", tmpdir))
         atexit.register(clean_tests_root)
         os.mkdir(os.path.join(tests_root[1], "root"))
-    os.chmod(tests_root[1], 0o777)
-    return os.path.join(tests_root[1], "root")
+        os.mkdir(os.path.join(tests_root[1], "root", "root"))
+        # The current file system can be mounted with nodev, so let's create a
+        # new tmpfs mount for /dev.
+        devpath = os.path.join(tests_root[1], "dev")
+        os.mkdir(devpath)
+        # zdtm wants to create files on this mount. User namespace tests are
+        # running with custom user and group mappings.
+        subprocess.check_call(["mount", "-t", "tmpfs", "criu-test-dev", devpath])
+        os.chmod(devpath, 0o777)
+    os.chmod(tests_root[1], 0o755)
+    os.chmod(os.path.join(tests_root[1], "root"), 0o755)
+    return os.path.join(tests_root[1], "root", "root"), os.path.join(tests_root[1], "dev")
 
 
 # Report generation
@@ -177,15 +201,16 @@ class host_flavor:
 
 class ns_flavor:
     __root_dirs = [
-        "/bin", "/sbin", "/etc", "/lib", "/lib64", "/dev", "/dev/pts",
-        "/dev/net", "/tmp", "/usr", "/proc", "/run"
+        "/bin", "/sbin", "/etc", "/lib", "/lib64", "/dev",
+        "/tmp", "/usr", "/proc", "/run"
     ]
+    __dev_dirs = ["pts", "net"]
 
     def __init__(self, opts):
         self.name = "ns"
         self.ns = True
         self.uns = False
-        self.root = make_tests_root()
+        self.root, self.devpath = make_tests_root()
         self.root_mounted = False
 
     def __copy_one(self, fname):
@@ -231,16 +256,19 @@ class ns_flavor:
             self.__copy_one(lib)
 
     def __mknod(self, name, rdev=None):
-        name = "/dev/" + name
+        tdev = stat.S_IFCHR
         if not rdev:
-            if not os.access(name, os.F_OK):
+            if not os.access(os.path.join("/dev", name), os.F_OK):
                 print("Skipping %s at root" % name)
                 return
             else:
-                rdev = os.stat(name).st_rdev
+                s = os.stat(os.path.join("/dev", name))
+                rdev = s.st_rdev
+                if stat.S_ISBLK(s.st_mode):
+                    tdev = stat.S_IFBLK
 
-        name = self.root + name
-        os.mknod(name, stat.S_IFCHR, rdev)
+        name = os.path.join(self.devpath, name)
+        os.mknod(name, tdev, rdev)
         os.chmod(name, 0o666)
 
     def __construct_root(self):
@@ -251,11 +279,18 @@ class ns_flavor:
         for ldir in ["/bin", "/sbin", "/lib", "/lib64"]:
             os.symlink(".." + ldir, self.root + "/usr" + ldir)
 
+    def __construct_dev(self):
+        for dir in self.__dev_dirs:
+            os.mkdir(os.path.join(self.devpath, dir))
+            os.chmod(os.path.join(self.devpath, dir), 0o755)
         self.__mknod("tty", os.makedev(5, 0))
         self.__mknod("null", os.makedev(1, 3))
         self.__mknod("net/tun")
         self.__mknod("rtc")
         self.__mknod("autofs", os.makedev(10, 235))
+        ext_dev = os.getenv("ZDTM_MNT_EXT_DEV")
+        if ext_dev:
+            self.__mknod(os.path.basename(ext_dev))
 
     def __copy_deps(self, deps):
         for d in deps.split('|'):
@@ -267,7 +302,7 @@ class ns_flavor:
 
     def init(self, l_bins, x_bins):
         subprocess.check_call(
-            ["mount", "--make-slave", "--bind", ".", self.root])
+            ["mount", "--make-private", "--bind", ".", self.root])
         self.root_mounted = True
 
         if not os.access(self.root + "/.constructed", os.F_OK):
@@ -278,6 +313,9 @@ class ns_flavor:
                     self.__construct_root()
                     os.mknod(self.root + "/.constructed", stat.S_IFREG | 0o600)
 
+        if not os.access(self.devpath + "/.constructed", os.F_OK):
+            self.__construct_dev()
+            os.mknod(self.devpath + "/.constructed", stat.S_IFREG | 0o600)
         for b in l_bins:
             self.__copy_libs(b)
         for b in x_bins:
@@ -392,19 +430,21 @@ class test_fail_expected_exc(Exception):
 
 
 class zdtm_test:
-    def __init__(self, name, desc, flavor, freezer):
+    def __init__(self, name, desc, flavor, freezer, rootless):
         self.__name = name
         self.__desc = desc
         self.__freezer = None
+        self.__timeout = int(self.__desc.get('timeout') or 30)
+        self.__rootless = rootless
         self.__make_action('cleanout')
         self.__pid = 0
         self.__flavor = flavor
         self.__freezer = freezer
         self._bins = [name]
-        self._env = {}
+        self._env = {'TMPDIR': os.environ.get('TMPDIR', '/tmp')}
         self._deps = desc.get('deps', [])
+        self._bind = desc.get('bind')
         self.auto_reap = True
-        self.__timeout = int(self.__desc.get('timeout') or 30)
 
     def __make_action(self, act, env=None, root=None):
         sys.stdout.flush()  # Not to let make's messages appear before ours
@@ -426,7 +466,7 @@ class zdtm_test:
             preexec_fn=self.__freezer and self.__freezer.attach or None)
         if act == "pid":
             try_run_hook(self, ["--post-start"])
-        if s.wait():
+        if s.wait(timeout=self.__timeout):
             raise test_fail_exc(str(s_args))
 
         if self.__freezer:
@@ -439,6 +479,8 @@ class zdtm_test:
         wait_pid_die(int(self.__pid), self.__name, self.__timeout)
 
     def __add_wperms(self):
+        if os.getuid() != 0:
+            return
         # Add write perms for .out and .pid files
         for b in self._bins:
             p = os.path.dirname(b)
@@ -457,6 +499,9 @@ class zdtm_test:
             env['ZDTM_NOTIFY_FDIN'] = "100"
             env['ZDTM_NOTIFY_FDOUT'] = "101"
 
+        if self.__rootless:
+            env['ZDTM_ROOTLESS'] = "1"
+
         if not test_flag(self.__desc, 'suid'):
             # Numbers should match those in criu
             env['ZDTM_UID'] = "18943"
@@ -469,6 +514,9 @@ class zdtm_test:
         if self.__flavor.ns:
             env['ZDTM_NEWNS'] = "1"
             env['ZDTM_ROOT'] = self.__flavor.root
+            if self._bind:
+                env['ZDTM_BIND'] = self._bind
+            env['ZDTM_DEV'] = self.__flavor.devpath
             env['PATH'] = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
 
             if self.__flavor.uns:
@@ -504,8 +552,15 @@ class zdtm_test:
         self.__freezer.thaw()
         if self.__pid:
             print("Send the %d signal to  %s" % (sig, self.__pid))
-            os.kill(int(self.__pid), sig)
-            self.gone(sig == signal.SIGKILL)
+            try:
+                os.kill(int(self.__pid), sig)
+            except ProcessLookupError:
+                if sig != signal.SIGKILL:
+                    raise
+                print("The process %s doesn't exist" % self.__pid)
+                self.gone(True)
+            else:
+                self.gone(sig == signal.SIGKILL)
 
         self.__flavor.fini()
 
@@ -569,12 +624,18 @@ class zdtm_test:
         return opts
 
     def getdopts(self):
-        return self.__getcropts() + self.__freezer.getdopts(
-        ) + self.__desc.get('dopts', '').split()
+        opts = self.__getcropts() + self.__freezer.getdopts() + \
+            self.__desc.get('dopts', '').split()
+        if self.__flavor.ns:
+            opts += ["--external", "mnt[/dev]:ZDTM_DEV"]
+        return opts
 
     def getropts(self):
-        return self.__getcropts() + self.__freezer.getropts(
-        ) + self.__desc.get('ropts', '').split()
+        opts = self.__getcropts() + self.__freezer.getropts() + \
+            self.__desc.get('ropts', '').split()
+        if self.__flavor.ns:
+            opts += ["--external", "mnt[ZDTM_DEV]:%s" % self.__flavor.devpath]
+        return opts
 
     def unlink_pidfile(self):
         self.__pid = 0
@@ -618,29 +679,41 @@ class zdtm_test:
                 ["make", "zdtm_ct"], env=dict(os.environ, MAKEFLAGS=""))
         if not os.access("zdtm/lib/libzdtmtst.a", os.F_OK):
             subprocess.check_call(["make", "-C", "zdtm/"])
+        if 'preload_libfault' in opts and opts['preload_libfault']:
+            subprocess.check_call(["make", "-C", "libfault/"])
+
+        subprocess.check_call(["make", '--no-print-directory', "-C", "plugins/", "clean"])
+        if 'criu_plugin' in opts and opts['criu_plugin']:
+            for name in opts['criu_plugin']:
+                subprocess.check_call(["make", '--no-print-directory', "-C", "plugins/", f"{name}_plugin.so"])
+
+        if 'mocked_cuda_checkpoint' in opts and opts['mocked_cuda_checkpoint']:
+            subprocess.check_call(["make", "-C", "cuda-checkpoint/"])
+        if 'rootless' in opts and opts['rootless']:
+            return
         subprocess.check_call(
             ["flock", "zdtm_mount_cgroups.lock", "./zdtm_mount_cgroups", str(uuid)])
 
     @staticmethod
     def cleanup():
+        if 'rootless' in opts and opts['rootless']:
+            return
         subprocess.check_call(
             ["flock", "zdtm_mount_cgroups.lock", "./zdtm_umount_cgroups", str(uuid)])
 
 
 def load_module_from_file(name, path):
-    if sys.version_info[0] == 3 and sys.version_info[1] >= 5:
-        import importlib.util
-        spec = importlib.util.spec_from_file_location(name, path)
-        mod = importlib.util.module_from_spec(spec)
-        spec.loader.exec_module(mod)
-    else:
-        import imp
-        mod = imp.load_source(name, path)
+    import importlib.util
+    spec = importlib.util.spec_from_file_location(name, path)
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
     return mod
 
 
 class inhfd_test:
-    def __init__(self, name, desc, flavor, freezer):
+    def __init__(self, name, desc, flavor, freezer, rootless):
+        if rootless:
+            raise test_fail_exc("This kind of test does not currently support rootless mode")
         self.__name = os.path.basename(name)
         print("Load %s" % name)
         self.__fdtyp = load_module_from_file(self.__name, name)
@@ -801,8 +874,8 @@ class inhfd_test:
 
 
 class groups_test(zdtm_test):
-    def __init__(self, name, desc, flavor, freezer):
-        zdtm_test.__init__(self, 'zdtm/lib/groups', desc, flavor, freezer)
+    def __init__(self, name, desc, flavor, freezer, rootless):
+        zdtm_test.__init__(self, 'zdtm/lib/groups', desc, flavor, freezer, rootless)
         if flavor.ns:
             self.__real_name = name
             with open(name) as fd:
@@ -814,7 +887,7 @@ class groups_test(zdtm_test):
 
         self._bins += self.__subs
         self._deps += get_test_desc('zdtm/lib/groups')['deps']
-        self._env = {'ZDTM_TESTS': self.__real_name}
+        self._env['ZDTM_TESTS'] = self.__real_name
 
     def __get_start_cmd(self, name):
         tdir = os.path.dirname(name)
@@ -824,7 +897,7 @@ class groups_test(zdtm_test):
         subprocess.check_call(s_args + [tname + '.cleanout'])
         s = subprocess.Popen(s_args + ['--dry-run', tname + '.pid'],
                              stdout=subprocess.PIPE)
-        out, _ = s.communicate()
+        out, _ = s.communicate(timeout=self.__timeout)
         cmd = out.decode().splitlines()[-1].strip()
 
         return 'cd /' + tdir + ' && ' + cmd
@@ -868,15 +941,22 @@ class criu_cli:
             fault=None,
             strace=[],
             preexec=None,
-            nowait=False):
+            preload_libfault=False,
+            nowait=False,
+            timeout=60):
         env = dict(
             os.environ,
-            ASAN_OPTIONS="log_path=asan.log:disable_coredump=0:detect_leaks=0")
+            ASAN_OPTIONS="log_path=asan.log:disable_coredump=0:detect_leaks=0",
+            CRIU_LIBS_DIR=PLUGINS_DIR
+        )
 
         if fault:
             print("Forcing %s fault" % fault)
             env['CRIU_FAULT'] = fault
 
+        if preload_libfault:
+            env['LD_PRELOAD'] = LIBFAULT_PATH
+
         cr = subprocess.Popen(strace +
                               [criu_bin, action, "--no-default-config"] + args,
                               env=env,
@@ -884,7 +964,11 @@ class criu_cli:
                               preexec_fn=preexec)
         if nowait:
             return cr
-        return cr.wait()
+        return cr.wait(timeout=timeout)
+
+    @staticmethod
+    def exit_signal(ret):
+        return ret < 0
 
 
 class criu_rpc_process:
@@ -967,7 +1051,9 @@ class criu_rpc:
             fault=None,
             strace=[],
             preexec=None,
-            nowait=False):
+            preload_libfault=False,
+            nowait=False,
+            timeout=None):
         if fault:
             raise test_fail_exc('RPC and FAULT not supported')
         if strace:
@@ -1004,8 +1090,11 @@ class criu_rpc:
             else:
                 raise test_fail_exc('RPC for %s required' % action)
         except crpc.CRIUExceptionExternal as e:
-            print("Fail", e)
-            ret = -1
+            if e.typ != e.resp_typ:
+                ret = -2
+            else:
+                print("Fail", e)
+                ret = -1
         else:
             ret = 0
 
@@ -1018,6 +1107,10 @@ class criu_rpc:
 
         return ret
 
+    @staticmethod
+    def exit_signal(ret):
+        return ret == -2
+
 
 class criu:
     def __init__(self, opts):
@@ -1039,6 +1132,7 @@ class criu:
         self.__dedup = bool(opts['dedup'])
         self.__mdedup = bool(opts['noauto_dedup'])
         self.__user = bool(opts['user'])
+        self.__rootless = bool(opts['rootless'])
         self.__leave_stopped = bool(opts['stop'])
         self.__stream = bool(opts['stream'])
         self.__show_stats = bool(opts['show_stats'])
@@ -1048,9 +1142,29 @@ class criu:
         self.__img_streamer_process = None
         self.__tls = self.__tls_options() if opts['tls'] else []
         self.__criu_bin = opts['criu_bin']
+
+        global crpc
+        pycriu_search_path = opts.get('pycriu_search_path')
+        if pycriu_search_path:
+            sys.path.insert(0, pycriu_search_path)
+
+        try:
+            import pycriu as crpc
+            if pycriu_search_path:
+                print(f"pycriu loaded from: {crpc.__file__}")
+        except ImportError:
+            if not pycriu_search_path:
+                print("Consider building CRIU or using '--pycriu-search-path' option.")
+            raise
+        finally:
+            if pycriu_search_path:
+                sys.path.pop(0)
+
         self.__crit_bin = opts['crit_bin']
         self.__pre_dump_mode = opts['pre_dump_mode']
+        self.__preload_libfault = bool(opts['preload_libfault'])
         self.__mntns_compat_mode = bool(opts['mntns_compat_mode'])
+        self.__cuda_checkpoint = bool(opts['mocked_cuda_checkpoint'])
 
         if opts['rpc']:
             self.__criu = criu_rpc
@@ -1133,11 +1247,17 @@ class criu:
         s_args = ["--log-file", log, "--images-dir", self.__ddir(),
                   "--verbosity=4"] + opts
 
+        if self.__cuda_checkpoint:
+            s_args += [ "--libdir" , os.path.join(os.getcwd(), "..", "plugins", "cuda") ]
+
         with open(os.path.join(self.__ddir(), action + '.cropt'), 'w') as f:
             f.write(' '.join(s_args) + '\n')
 
         print("Run criu " + action)
 
+        if self.__rootless:
+            s_args += ["--unprivileged"]
+
         strace = []
         if self.__sat:
             fname = os.path.join(self.__ddir(), action + '.strace')
@@ -1156,7 +1276,10 @@ class criu:
         if action == "restore":
             preexec = None
         else:
-            preexec = self.__user and self.set_user_id or None
+            if os.getuid():
+                preexec = None
+            else:
+                preexec = self.__user and self.set_user_id or None
 
         __ddir = self.__ddir()
 
@@ -1171,8 +1294,10 @@ class criu:
         with open("/proc/sys/kernel/ns_last_pid") as ns_last_pid_fd:
             ns_last_pid = ns_last_pid_fd.read()
 
+        preload_libfault = self.__preload_libfault and action in ['dump', 'pre-dump', 'restore']
+
         ret = self.__criu.run(action, s_args, self.__criu_bin, self.__fault,
-                              strace, preexec, nowait)
+                              strace, preexec, preload_libfault, nowait)
 
         if nowait:
             os.close(status_fds[1])
@@ -1212,8 +1337,8 @@ class criu:
                     return
             rst_succeeded = os.access(
                 os.path.join(__ddir, "restore-succeeded"), os.F_OK)
-            if self.__test.blocking() or (self.__sat and action == 'restore' and
-                                          rst_succeeded):
+            if (self.__test.blocking() and not self.__criu.exit_signal(ret)) or \
+               (self.__sat and action == 'restore' and rst_succeeded):
                 raise test_fail_expected_exc(action)
             else:
                 raise test_fail_exc("CRIU %s" % action)
@@ -1476,15 +1601,17 @@ class criu:
             except Exception:
                 return False
 
-        return criu_cli.run(
-            "check",
-            ["--no-default-config", "--verbosity=0", "--feature", feature],
-            opts['criu_bin']) == 0
+        args = ["--no-default-config", "-verbosity=0", "--feature", feature]
+        if opts['rootless']:
+            args += ["--unprivileged"]
+
+        return criu_cli.run("check", args, opts['criu_bin']) == 0
 
     @staticmethod
     def available():
         if not os.access(opts['criu_bin'], os.X_OK):
             print("CRIU binary not found at %s" % opts['criu_bin'])
+            print("Consider building CRIU or using '--criu-bin' option.")
             sys.exit(1)
 
     def kill(self):
@@ -1651,6 +1778,15 @@ def get_visible_state(test):
     return files, maps, mounts
 
 
+def has_vsyscall(maps):
+    vsyscall = u"ffffffffff600000-ffffffffff601000"
+    for i in maps:
+        if vsyscall in i:
+            return i
+
+    return None
+
+
 def check_visible_state(test, state, opts):
     new = get_visible_state(test)
 
@@ -1666,9 +1802,9 @@ def check_visible_state(test, state, opts):
         new_maps = new[1][pid]
         if os.getenv("COMPAT_TEST"):
             # the vsyscall vma isn't unmapped from x32 processes
-            vsyscall = u"ffffffffff600000-ffffffffff601000 r-xp"
-            if vsyscall in new_maps and vsyscall not in old_maps:
-                new_maps.remove(vsyscall)
+            entry = has_vsyscall(new_maps)
+            if entry and has_vsyscall(old_maps) is None:
+                new_maps.remove(entry)
         if old_maps != new_maps:
             print("%s: Old maps lost: %s" % (pid, old_maps - new_maps))
             print("%s: New maps appeared: %s" % (pid, new_maps - old_maps))
@@ -1891,7 +2027,7 @@ def do_run_test(tname, tdesc, flavs, opts):
         if opts['dry_run']:
             continue
         flav = flavors[f](opts)
-        t = tclass(tname, tdesc, flav, fcg)
+        t = tclass(tname, tdesc, flav, fcg, opts['rootless'])
         cr_api = criu(opts)
 
         try:
@@ -1942,8 +2078,6 @@ class Launcher:
         self.__subs = {}
         self.__fail = False
         self.__file_report = None
-        self.__junit_file = None
-        self.__junit_test_cases = None
         self.__failed = []
         self.__nr_skip = 0
         if self.__max > 1 and self.__total > 1:
@@ -1955,22 +2089,14 @@ class Launcher:
 
         if opts['report'] and (opts['keep_going'] or self.__total == 1):
             global TestSuite, TestCase
-            from junit_xml import TestCase, TestSuite
             now = datetime.datetime.now()
             att = 0
             reportname = os.path.join(report_dir, "criu-testreport.tap")
-            junitreport = os.path.join(report_dir, "criu-testreport.xml")
-            while os.access(reportname, os.F_OK) or os.access(
-                    junitreport, os.F_OK):
+            while os.access(reportname, os.F_OK):
                 reportname = os.path.join(report_dir,
                                           "criu-testreport" + ".%d.tap" % att)
-                junitreport = os.path.join(report_dir,
-                                           "criu-testreport" + ".%d.xml" % att)
                 att += 1
 
-            self.__junit_file = open(junitreport, 'a')
-            self.__junit_test_cases = []
-
             self.__file_report = open(reportname, 'a')
             print(u"TAP version 13", file=self.__file_report)
             print(u"# Hardware architecture: " + arch, file=self.__file_report)
@@ -1979,12 +2105,20 @@ class Launcher:
                   file=self.__file_report)
             print(u"# ", file=self.__file_report)
             print(u"1.." + str(nr_tests), file=self.__file_report)
-        with open("/proc/sys/kernel/tainted") as taintfd:
-            self.__taint = taintfd.read()
+        self.__taint = self.__read_kernel_tainted()
         if int(self.__taint, 0) != 0:
-            print("The kernel is tainted: %r" % self.__taint)
-            if not opts["ignore_taint"] and os.getenv("ZDTM_IGNORE_TAINT") != '1':
-                raise Exception("The kernel is tainted: %r" % self.__taint)
+            self.__report_kernel_taint("The kernel is tainted: %r" % self.__taint)
+
+    @staticmethod
+    def __read_kernel_tainted():
+        with open("/proc/sys/kernel/tainted") as taintfd:
+            return taintfd.read().strip()
+
+    @staticmethod
+    def __report_kernel_taint(msg):
+        print(msg)
+        if not opts["ignore_taint"] and os.getenv("ZDTM_IGNORE_TAINT") != "1":
+            raise Exception(msg)
 
     def __show_progress(self, msg):
         perc = int(self.__nr * 16 / self.__total)
@@ -1997,10 +2131,6 @@ class Launcher:
         self.__runtest += 1
         self.__nr_skip += 1
 
-        if self.__junit_test_cases is not None:
-            tc = TestCase(name)
-            tc.add_skipped_info(reason)
-            self.__junit_test_cases.append(tc)
         if self.__file_report:
             testline = u"ok %d - %s # SKIP %s" % (self.__runtest, name, reason)
             print(testline, file=self.__file_report)
@@ -2010,11 +2140,12 @@ class Launcher:
         if len(self.__subs) >= self.__max:
             self.wait()
 
-        with open("/proc/sys/kernel/tainted") as taintfd:
-            taint = taintfd.read()
+        taint = self.__read_kernel_tainted()
         if self.__taint != taint:
-            raise Exception("The kernel is tainted: %r (%r)" %
-                            (taint, self.__taint))
+            prev_taint = self.__taint
+            self.__taint = taint
+            self.__report_kernel_taint(
+                "The kernel is tainted: %r (was %r)" % (taint, prev_taint))
 
         '''
         The option --link-remap allows criu to hardlink open files back to the
@@ -2042,7 +2173,9 @@ class Launcher:
               'sat', 'script', 'rpc', 'criu_config', 'lazy_pages', 'join_ns',
               'dedup', 'sbs', 'freezecg', 'user', 'dry_run', 'noauto_dedup',
               'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'stream',
-              'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode', 'mntns_compat_mode')
+              'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode', 'mntns_compat_mode',
+              'rootless', 'preload_libfault', 'mocked_cuda_checkpoint',
+              'pycriu_search_path')
         arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd}))
 
         if self.__use_log:
@@ -2052,8 +2185,14 @@ class Launcher:
             logf = None
             log = None
 
+        if opts['rootless'] and os.getuid() == 0:
+            os.setgid(NON_ROOT_UID)
+            os.setuid(NON_ROOT_UID)
+        env = dict(os.environ, CR_CT_TEST_INFO=arg)
+        if opts['mocked_cuda_checkpoint']:
+            env['PATH'] = os.path.join(os.getcwd(), "cuda-checkpoint") + ":" + env["PATH"]
         sub = subprocess.Popen(["./zdtm_ct", "zdtm.py"],
-                               env=dict(os.environ, CR_CT_TEST_INFO=arg),
+                               env=env,
                                stdout=log,
                                stderr=subprocess.STDOUT,
                                close_fds=True)
@@ -2093,11 +2232,6 @@ class Launcher:
             # The following wait() is not useful for our domain logic.
             # It's useful for taming warnings in subprocess.Popen.__del__()
             sub['sub'].wait()
-            tc = None
-            if self.__junit_test_cases is not None:
-                tc = TestCase(sub['name'],
-                              elapsed_sec=time.time() - sub['start'])
-                self.__junit_test_cases.append(tc)
             if status != 0:
                 self.__fail = True
                 failed_flavor = decode_flav(os.WEXITSTATUS(status))
@@ -2108,7 +2242,6 @@ class Launcher:
                     with open(sub['log']) as sublog:
                         output = sublog.read()
                     details = {'output': output}
-                    tc.add_error_info(output=output)
                     print(testline, file=self.__file_report)
                     print("%s" % yaml.safe_dump(details,
                                                 explicit_start=True,
@@ -2154,10 +2287,6 @@ class Launcher:
         if not opts['fault'] and check_core_files():
             self.__fail = True
         if self.__file_report:
-            ts = TestSuite(opts['title'], self.__junit_test_cases,
-                           os.getenv("NODE_NAME"))
-            self.__junit_file.write(TestSuite.to_xml_string([ts]))
-            self.__junit_file.close()
             self.__file_report.close()
 
         if opts['keep_going']:
@@ -2193,9 +2322,21 @@ def all_tests(opts):
                 continue
             files.append(fp)
     excl = list(map(lambda x: os.path.join(desc['dir'], x), desc['exclude']))
-    tlist = list(filter(
+    tlist = list(sorted(filter(
         lambda x: not x.endswith('.checkskip') and not x.endswith('.hook') and
-        x not in excl, map(lambda x: x.strip(), files)))
+        x not in excl, map(lambda x: x.strip(), files))))
+
+    if opts.get('test_shard_count'):
+        if opts.get('test_shard_index') is None:
+            raise KeyError('--test_shard_count > 0 must come with --test_shard_index')
+        slice_idx = opts['test_shard_index']
+        slices = opts['test_shard_count']
+        if slice_idx >= slices:
+            raise IndexError('--test_shard_index not less than --test_shard_count ({} >= {})'.format(slice_idx, slices))
+        slist = list(tlist[slice_idx::slices])
+        print("We're shard #{} of {}. Running {} of {} tests.\n".format(slice_idx, slices, len(slist), len(tlist)))
+        tlist = slist
+
     return tlist
 
 
@@ -2306,11 +2447,6 @@ def run_tests(opts):
         return
 
     torun = list(torun)
-    if opts['keep_going'] and len(torun) < 2:
-        print(
-            "[WARNING] Option --keep-going is more useful when running multiple tests"
-        )
-        opts['keep_going'] = False
 
     if opts['exclude']:
         excl = re.compile(".*(" + "|".join(opts['exclude']) + ")")
@@ -2353,6 +2489,7 @@ def run_tests(opts):
                 "Specify --criu-image-streamer-dir or modify PATH to provide an alternate location")
                 .format(streamer_dir))
 
+    usernsIsSupported = criu.check("userns")
     launcher = Launcher(opts, len(torun))
     try:
         for t in torun:
@@ -2422,7 +2559,7 @@ def run_tests(opts):
                 run_flavs = set(test_flavs) & set(opts_flavs)
             else:
                 run_flavs = set([test_flavs.pop()])
-            if not criu.check("userns"):
+            if not usernsIsSupported:
                 run_flavs -= set(['uns'])
             if opts['user']:
                 # FIXME -- probably uns will make sense
@@ -2591,6 +2728,10 @@ def set_nr_hugepages(nr):
         with open("/proc/sys/vm/nr_hugepages", "w") as f:
             f.write("{}\n".format(nr))
         return orig_hugepages
+    except PermissionError as err:
+        # EACCES is expected when running as non-root, otherwise re-raise the exception.
+        if err.errno != errno.EACCES or os.getuid() == 0:
+            raise
     except OSError as err:
         if err.errno != errno.EOPNOTSUPP:
             raise
@@ -2664,6 +2805,10 @@ def get_cli_args():
     rp.add_argument("--freezecg", help="Use freeze cgroup (path:state)")
     rp.add_argument("--user", help="Run CRIU as regular user",
                     action='store_true')
+    rp.add_argument(
+        "--rootless",
+        help="Run CRIU rootless (uid!=0) (needs CAP_CHECKPOINT_RESTORE)",
+        action='store_true')
     rp.add_argument("--rpc",
                     help="Run CRIU via RPC rather than CLI",
                     action='store_true')
@@ -2711,6 +2856,9 @@ def get_cli_args():
     rp.add_argument("--criu-bin",
                     help="Path to criu binary",
                     default='../criu/criu')
+    rp.add_argument("--pycriu-search-path",
+                    help=f"Path to search for pycriu module first (e.g., {site.getsitepackages()[0]})",
+                    default=None)
     rp.add_argument("--crit-bin",
                     help="Path to crit binary",
                     default='../crit/crit')
@@ -2724,6 +2872,19 @@ def get_cli_args():
     rp.add_argument("--mntns-compat-mode",
                     help="Use old compat mounts restore engine",
                     action='store_true')
+    rp.add_argument("--test-shard-index", type=int, default=None,
+                    help="Select tests for a shard <index> (0-based)")
+    rp.add_argument("--test-shard-count", type=int, default=0,
+                    help="Specify how many shards are being run (0=sharding disabled; must be the same for all shards)")
+    rp.add_argument("--preload-libfault", action="store_true", help="Run criu with library preload to simulate special cases")
+    rp.add_argument("--criu-plugin",
+                    help="Run tests with CRIU plugin",
+                    choices=['amdgpu', 'cuda', 'inventory_test_enabled', 'inventory_test_disabled'],
+                    nargs='+',
+                    default=None)
+    rp.add_argument("--mocked-cuda-checkpoint",
+                    action="store_true",
+                    help="Run criu with the cuda plugin and the mocked cuda-checkpoint tool")
 
     lp = sp.add_parser("list", help="List tests")
     lp.set_defaults(action=list_tests)
@@ -2788,7 +2949,7 @@ if __name__ == '__main__':
     if opts['debug']:
         sys.settrace(traceit)
 
-    if opts['action'] == 'run':
+    if opts['action'] == run_tests:
         criu.available()
     for tst in test_classes.values():
         tst.available()
diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc
index d34523315..c95b4ef6a 100644
--- a/test/zdtm/Makefile.inc
+++ b/test/zdtm/Makefile.inc
@@ -23,12 +23,12 @@ ifeq ($(ARCH),arm)
         ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7')
 
         ifeq ($(ARMV),6)
-                USERCFLAGS += -march=armv6
+                ARCHCFLAGS += -march=armv6
         else ifeq ($(ARMV),7)
-                USERCFLAGS += -march=armv7-a+fp
+                ARCHCFLAGS += -march=armv7-a+fp
         else ifeq ($(ARMV),8)
-                # To build aarch32 on armv8 Travis-CI (see criu Makefile)
-                USERCFLAGS += -march=armv7-a
+                # To build aarch32 on armv8 (see criu Makefile)
+                ARCHCFLAGS += -march=armv7-a
                 ARMV := 7
         endif
 endif
@@ -40,8 +40,8 @@ endif
 PKG_CONFIG ?= pkg-config
 CFLAGS	+= -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0
 CFLAGS	+= -Wdeclaration-after-statement -Wstrict-prototypes
-CFLAGS	+= $(USERCFLAGS)
-CFLAGS	+= -D_GNU_SOURCE
+CFLAGS	+= $(USERCFLAGS) $(ARCHCFLAGS)
+CFLAGS	+= -D_GNU_SOURCE -D_LARGEFILE64_SOURCE
 CPPFLAGS += -iquote $(LIBDIR)/arch/$(ARCH)/include
 
 ifeq ($(strip $(V)),)
@@ -66,6 +66,11 @@ endif
         export PKG_CONFIG_PATH
 endif
 
+ifeq ($(SHSTK_ENABLE),1)
+        CFLAGS  += -mshstk
+        LDFLAGS += -Wl,-z,shstk
+endif
+
 define pkg-libs
         $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(PKG_CONFIG) --libs $(1))
 endef
@@ -74,9 +79,17 @@ define pkg-cflags
         $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(PKG_CONFIG) --cflags $(1))
 endef
 
+ifeq ($(GCS_ENABLE),1)
+        CFLAGS  += -mbranch-protection=standard
+        LDFLAGS += -z experimental-gcs=check
+        TEST_ENV = GLIBC_TUNABLES=glibc.cpu.aarch64_gcs=1:glibc.cpu.aarch64_gcs_policy=2
+else
+        TEST_ENV =
+endif
+
 %.d: %.c
 	$(E) " DEP      " $@
-	$(Q)$(CC) $(CFLAGS) $(CPPFLAGS) -MM -MP -c $< -o $@
+	$(Q)$(CC) $(CFLAGS) $(CPPFLAGS) -MM -MP $< -o $@
 
 %.o: %.c | %.d
 	$(E) " CC       " $@
diff --git a/test/zdtm/criu_config.py b/test/zdtm/criu_config.py
index 487becfb4..9fd292747 100644
--- a/test/zdtm/criu_config.py
+++ b/test/zdtm/criu_config.py
@@ -11,6 +11,7 @@ class criu_config:
             fault=None,
             strace=[],
             preexec=None,
+            preload=False,
             nowait=False):
 
         config_path = tempfile.mktemp(".conf", "criu-%s-" % action)
@@ -40,3 +41,7 @@ class criu_config:
         if nowait:
             return cr
         return cr.wait()
+
+    @staticmethod
+    def exit_signal(ret):
+        return ret < 0
diff --git a/test/zdtm/lib/Makefile b/test/zdtm/lib/Makefile
index 3ec58dfaf..428d726d6 100644
--- a/test/zdtm/lib/Makefile
+++ b/test/zdtm/lib/Makefile
@@ -1,10 +1,10 @@
 LIBDIR	:= .
 
-CFLAGS	+= $(USERCFLAGS)
+CFLAGS	+= $(USERCFLAGS) $(ARCHCFLAGS)
 
 LIB	:= libzdtmtst.a
 
-LIBSRC	:= datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c unix.c fs.c sysctl.c mem.c
+LIBSRC	:= datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c unix.c fs.c sysctl.c mem.c file.c mountinfo.c
 
 PKG_CONFIG ?= pkg-config
 pkg-config-check = $(shell sh -c '$(PKG_CONFIG) $(1) && echo y')
@@ -34,4 +34,4 @@ clean: clean-more
 
 $(LIB):	$(LIBOBJ)
 	$(E) " AR       " $@
-	$(Q)ar rcs $@ $^
+	$(Q)$(AR) rcs $@ $^
diff --git a/test/zdtm/lib/arch/loongarch64/include/asm/atomic.h b/test/zdtm/lib/arch/loongarch64/include/asm/atomic.h
new file mode 100644
index 000000000..1803aaeb4
--- /dev/null
+++ b/test/zdtm/lib/arch/loongarch64/include/asm/atomic.h
@@ -0,0 +1,49 @@
+#ifndef __CR_ATOMIC_H__
+#define __CR_ATOMIC_H__
+
+typedef uint32_t atomic_t;
+
+#define atomic_get(v)	 (*(volatile int *)v)
+#define atomic_set(v, i) (*(v) = (i))
+
+static inline int __atomic_add(int i, atomic_t *v)
+{
+	int result;
+	asm volatile("amadd_db.w %1, %2, %0" : "+ZB"(*v), "=&r"(result) : "r"(i) : "memory");
+	return result + i;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+	__atomic_add(i, v);
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	return __atomic_add(i, v);
+}
+
+#define atomic_sub(i, v)	atomic_add(-(int)i, v)
+#define atomic_sub_return(i, v) atomic_add_return(-(int)i, v)
+#define atomic_inc(v)		atomic_add_return(1, v)
+#define atomic_dec(v)		atomic_sub_return(1, v)
+#define atomic_dec_return(v)	atomic_sub_return(1, v)
+
+static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
+{
+	int ret;
+	asm volatile("1:                     \n"
+		     " ll.w  %0, %1          \n"
+		     " bne   %0, %2, 2f      \n"
+		     " or    $t0, %3, $zero  \n"
+		     " sc.w  $t0, %1         \n"
+		     " beqz  $t0, 1b         \n"
+		     "2:                     \n"
+		     " dbar  0               \n"
+		     : "=&r"(ret), "+ZB"(*ptr)
+		     : "r"(old), "r"(new)
+		     : "t0", "memory");
+	return ret;
+}
+
+#endif /* __CR_ATOMIC_H__ */
diff --git a/test/zdtm/lib/arch/riscv64/include/asm/atomic.h b/test/zdtm/lib/arch/riscv64/include/asm/atomic.h
new file mode 100644
index 000000000..a4faf1322
--- /dev/null
+++ b/test/zdtm/lib/arch/riscv64/include/asm/atomic.h
@@ -0,0 +1,107 @@
+#ifndef __CR_ATOMIC_H__
+#define __CR_ATOMIC_H__
+
+typedef uint32_t atomic_t;
+
+/* Copied from the Linux header arch/riscv/include/asm/barrier.h */
+
+#define nop() __asm__ __volatile__("nop")
+
+#define RISCV_FENCE(p, s) __asm__ __volatile__("fence " #p "," #s : : : "memory")
+
+/* These barriers need to enforce ordering on both devices or memory. */
+#define mb()  RISCV_FENCE(iorw, iorw)
+#define rmb() RISCV_FENCE(ir, ir)
+#define wmb() RISCV_FENCE(ow, ow)
+
+/* These barriers do not need to enforce ordering on devices, just memory. */
+#define __smp_mb()  RISCV_FENCE(rw, rw)
+#define __smp_rmb() RISCV_FENCE(r, r)
+#define __smp_wmb() RISCV_FENCE(w, w)
+
+#define __smp_store_release(p, v)                   \
+	do {                                        \
+		compiletime_assert_atomic_type(*p); \
+		RISCV_FENCE(rw, w);                 \
+		WRITE_ONCE(*p, v);                  \
+	} while (0)
+
+#define __smp_load_acquire(p)                       \
+	({                                          \
+		typeof(*p) ___p1 = READ_ONCE(*p);   \
+		compiletime_assert_atomic_type(*p); \
+		RISCV_FENCE(r, rw);                 \
+		___p1;                              \
+	})
+
+/* Copied from the Linux kernel header arch/riscv/include/asm/atomic.h */
+
+static inline int atomic_read(const atomic_t *v)
+{
+	return (*(volatile int *)v);
+}
+
+static inline void atomic_set(atomic_t *v, int i)
+{
+	*v = i;
+}
+
+#define atomic_get atomic_read
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	int result;
+
+	asm volatile("amoadd.w.aqrl %1, %2, %0" : "+A"(*v), "=r"(result) : "r"(i) : "memory");
+	__smp_mb();
+	return result + i;
+}
+
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+	return atomic_add_return(-i, v);
+}
+
+static inline int atomic_inc(atomic_t *v)
+{
+	return atomic_add_return(1, v) - 1;
+}
+
+static inline int atomic_add(int val, atomic_t *v)
+{
+	return atomic_add_return(val, v) - val;
+}
+
+static inline int atomic_dec(atomic_t *v)
+{
+	return atomic_sub_return(1, v) + 1;
+}
+
+/* true if the result is 0, or false for all other cases. */
+#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
+#define atomic_dec_return(v)   (atomic_sub_return(1, v))
+
+#define atomic_inc_return(v) (atomic_add_return(1, v))
+
+static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
+{
+	unsigned long tmp;
+	int oldval;
+
+	__smp_mb();
+
+	asm volatile("1:\n"
+		     "  lr.w %1, %2\n"
+		     "  bne %1, %3, 2f\n"
+		     "  sc.w %0, %4, %2\n"
+		     "  bnez %0, 1b\n"
+		     "2:"
+		     : "=&r"(tmp), "=&r"(oldval), "+A"(*ptr)
+		     : "r"(old), "r"(new)
+		     : "memory");
+
+	__smp_mb();
+	return oldval;
+}
+
+#endif /* __CR_ATOMIC_H__ */
diff --git a/test/zdtm/lib/file.c b/test/zdtm/lib/file.c
new file mode 100644
index 000000000..57d85421d
--- /dev/null
+++ b/test/zdtm/lib/file.c
@@ -0,0 +1,46 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include "zdtmtst.h"
+
+int write_value(const char *path, const char *value)
+{
+	int fd, l;
+
+	fd = open(path, O_WRONLY);
+	if (fd < 0) {
+		pr_perror("open %s", path);
+		return -1;
+	}
+
+	l = write(fd, value, strlen(value));
+	if (l < 0) {
+		pr_perror("failed to write %s to %s", value, path);
+		close(fd);
+		return -1;
+	}
+
+	close(fd);
+	return 0;
+}
+
+int read_value(const char *path, char *value, int size)
+{
+	int fd, ret;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		pr_perror("open %s", path);
+		return -1;
+	}
+
+	ret = read(fd, (void *)value, size);
+	if (ret < 0) {
+		pr_perror("read %s", path);
+		close(fd);
+		return -1;
+	}
+
+	value[ret] = '\0';
+	close(fd);
+	return 0;
+}
diff --git a/test/zdtm/lib/fs.c b/test/zdtm/lib/fs.c
index 7b8be5f9f..efcc7a1d0 100644
--- a/test/zdtm/lib/fs.c
+++ b/test/zdtm/lib/fs.c
@@ -54,7 +54,7 @@ mnt_info_t *get_cwd_mnt_info(void)
 
 	while (fgets(str, sizeof(str), f)) {
 		char *hyphen = strchr(str, '-');
-		ret = sscanf(str, "%i %i %u:%u %s %s", &mnt_id, &parent_mnt_id, &kmaj, &kmin, root, mountpoint);
+		ret = sscanf(str, "%i %i %u:%u %4095s %4095s", &mnt_id, &parent_mnt_id, &kmaj, &kmin, root, mountpoint);
 		if (ret != 6 || !hyphen)
 			goto err;
 		ret = sscanf(hyphen + 1, " %ms", &fsname);
@@ -108,6 +108,7 @@ int get_cwd_check_perm(char **result)
 		       "Bit 'x' should be set in all path components of "
 		       "this directory\n",
 		       cwd, getuid(), getgid(), errno, strerror(errno));
+		free(cwd);
 		return -1;
 	}
 
diff --git a/test/zdtm/lib/list.h b/test/zdtm/lib/list.h
new file mode 100644
index 000000000..97d0f1e06
--- /dev/null
+++ b/test/zdtm/lib/list.h
@@ -0,0 +1,389 @@
+#ifndef __ZDTM_LIST_H__
+#define __ZDTM_LIST_H__
+
+/*
+ * Double linked lists.
+ */
+
+#include <stddef.h>
+#include "zdtmtst.h"
+
+#define POISON_POINTER_DELTA 0
+#define LIST_POISON1	     ((void *)0x00100100 + POISON_POINTER_DELTA)
+#define LIST_POISON2	     ((void *)0x00200200 + POISON_POINTER_DELTA)
+
+struct list_head {
+	struct list_head *prev, *next;
+};
+
+#define LIST_HEAD_INIT(name)     \
+	{                        \
+		&(name), &(name) \
+	}
+#define LIST_HEAD(name) struct list_head name = LIST_HEAD_INIT(name)
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+static inline void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+static inline void __list_del(struct list_head *prev, struct list_head *next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+static inline void __list_del_entry(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+}
+
+static inline void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	entry->next = LIST_POISON1;
+	entry->prev = LIST_POISON2;
+}
+
+static inline void list_replace(struct list_head *old, struct list_head *new)
+{
+	new->next = old->next;
+	new->next->prev = new;
+	new->prev = old->prev;
+	new->prev->next = new;
+}
+
+static inline void list_replace_init(struct list_head *old, struct list_head *new)
+{
+	list_replace(old, new);
+	INIT_LIST_HEAD(old);
+}
+
+static inline void list_del_init(struct list_head *entry)
+{
+	__list_del_entry(entry);
+	INIT_LIST_HEAD(entry);
+}
+
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+	__list_del_entry(list);
+	list_add(list, head);
+}
+
+static inline void list_move_tail(struct list_head *list, struct list_head *head)
+{
+	__list_del_entry(list);
+	list_add_tail(list, head);
+}
+
+static inline int list_is_last(const struct list_head *list, const struct list_head *head)
+{
+	return list->next == head;
+}
+
+static inline int list_is_first(const struct list_head *list, const struct list_head *head)
+{
+	return list->prev == head;
+}
+
+static inline int list_empty(const struct list_head *head)
+{
+	return head->next == head;
+}
+
+static inline int list_empty_careful(const struct list_head *head)
+{
+	struct list_head *next = head->next;
+	return (next == head) && (next == head->prev);
+}
+static inline void list_rotate_left(struct list_head *head)
+{
+	struct list_head *first;
+
+	if (!list_empty(head)) {
+		first = head->next;
+		list_move_tail(first, head);
+	}
+}
+
+static inline int list_is_singular(const struct list_head *head)
+{
+	return !list_empty(head) && (head->next == head->prev);
+}
+
+static inline void __list_cut_position(struct list_head *list, struct list_head *head, struct list_head *entry)
+{
+	struct list_head *new_first = entry->next;
+	list->next = head->next;
+	list->next->prev = list;
+	list->prev = entry;
+	entry->next = list;
+	head->next = new_first;
+	new_first->prev = head;
+}
+
+static inline void list_cut_position(struct list_head *list, struct list_head *head, struct list_head *entry)
+{
+	if (list_empty(head))
+		return;
+	if (list_is_singular(head) && (head->next != entry && head != entry))
+		return;
+	if (entry == head)
+		INIT_LIST_HEAD(list);
+	else
+		__list_cut_position(list, head, entry);
+}
+
+static inline void __list_splice(const struct list_head *list, struct list_head *prev, struct list_head *next)
+{
+	struct list_head *first = list->next;
+	struct list_head *last = list->prev;
+
+	first->prev = prev;
+	prev->next = first;
+
+	last->next = next;
+	next->prev = last;
+}
+
+static inline void list_splice(const struct list_head *list, struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head, head->next);
+}
+
+static inline void list_splice_tail(struct list_head *list, struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head->prev, head);
+}
+
+static inline void list_splice_init(struct list_head *list, struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head, head->next);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+static inline void list_splice_tail_init(struct list_head *list, struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head->prev, head);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+#define list_entry(ptr, type, member) container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member) list_entry((ptr)->next, type, member)
+
+#define list_for_each(pos, head) for (pos = (head)->next; pos != (head); pos = pos->next)
+
+#define list_for_each_prev(pos, head) for (pos = (head)->prev; pos != (head); pos = pos->prev)
+
+#define list_for_each_safe(pos, n, head) for (pos = (head)->next, n = pos->next; pos != (head); pos = n, n = pos->next)
+
+#define list_for_each_prev_safe(pos, n, head) \
+	for (pos = (head)->prev, n = pos->prev; pos != (head); pos = n, n = pos->prev)
+
+#define list_for_each_entry(pos, head, member)                                             \
+	for (pos = list_entry((head)->next, typeof(*pos), member); &pos->member != (head); \
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+#define list_for_each_entry_reverse(pos, head, member)                                     \
+	for (pos = list_entry((head)->prev, typeof(*pos), member); &pos->member != (head); \
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+#define list_prepare_entry(pos, head, member) ((pos) ?: list_entry(head, typeof(*pos), member))
+
+#define list_for_each_entry_continue(pos, head, member)                                        \
+	for (pos = list_entry(pos->member.next, typeof(*pos), member); &pos->member != (head); \
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+#define list_for_each_entry_continue_reverse(pos, head, member)                                \
+	for (pos = list_entry(pos->member.prev, typeof(*pos), member); &pos->member != (head); \
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+#define list_for_each_entry_from(pos, head, member) \
+	for (; &pos->member != (head); pos = list_entry(pos->member.next, typeof(*pos), member))
+
+#define list_for_each_entry_safe(pos, n, head, member)              \
+	for (pos = list_entry((head)->next, typeof(*pos), member),  \
+	    n = list_entry(pos->member.next, typeof(*pos), member); \
+	     &pos->member != (head); pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+#define list_for_each_entry_safe_continue(pos, n, head, member)        \
+	for (pos = list_entry(pos->member.next, typeof(*pos), member), \
+	    n = list_entry(pos->member.next, typeof(*pos), member);    \
+	     &pos->member != (head); pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+#define list_for_each_entry_safe_from(pos, n, head, member)                                  \
+	for (n = list_entry(pos->member.next, typeof(*pos), member); &pos->member != (head); \
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+#define list_for_each_entry_safe_reverse(pos, n, head, member)      \
+	for (pos = list_entry((head)->prev, typeof(*pos), member),  \
+	    n = list_entry(pos->member.prev, typeof(*pos), member); \
+	     &pos->member != (head); pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
+#define list_safe_reset_next(pos, n, member) n = list_entry(pos->member.next, typeof(*pos), member)
+
+/*
+ * Double linked lists with a single pointer list head.
+ */
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+#define HLIST_HEAD_INIT       \
+	{                     \
+		.first = NULL \
+	}
+#define HLIST_HEAD(name)     struct hlist_head name = { .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+	h->next = NULL;
+	h->pprev = NULL;
+}
+
+static inline int hlist_unhashed(const struct hlist_node *h)
+{
+	return !h->pprev;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+	return !h->first;
+}
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+	struct hlist_node *next = n->next;
+	struct hlist_node **pprev = n->pprev;
+	*pprev = next;
+	if (next)
+		next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+	__hlist_del(n);
+	n->next = LIST_POISON1;
+	n->pprev = LIST_POISON2;
+}
+
+static inline void hlist_del_init(struct hlist_node *n)
+{
+	if (!hlist_unhashed(n)) {
+		__hlist_del(n);
+		INIT_HLIST_NODE(n);
+	}
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+	n->pprev = &h->first;
+}
+
+/* next must be != NULL */
+static inline void hlist_add_before(struct hlist_node *n, struct hlist_node *next)
+{
+	n->pprev = next->pprev;
+	n->next = next;
+	next->pprev = &n->next;
+	*(n->pprev) = n;
+}
+
+static inline void hlist_add_after(struct hlist_node *n, struct hlist_node *next)
+{
+	next->next = n->next;
+	n->next = next;
+	next->pprev = &n->next;
+
+	if (next->next)
+		next->next->pprev = &next->next;
+}
+
+/* after that we'll appear to be on some hlist and hlist_del will work */
+static inline void hlist_add_fake(struct hlist_node *n)
+{
+	n->pprev = &n->next;
+}
+
+/*
+ * Move a list from one list head to another. Fixup the pprev
+ * reference of the first entry if it exists.
+ */
+static inline void hlist_move_list(struct hlist_head *old, struct hlist_head *new)
+{
+	new->first = old->first;
+	if (new->first)
+		new->first->pprev = &new->first;
+	old->first = NULL;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr, type, member)
+
+#define hlist_for_each(pos, head) for (pos = (head)->first; pos; pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head)                \
+	for (pos = (head)->first; pos && ({              \
+					  n = pos->next; \
+					  1;             \
+				  });                    \
+	     pos = n)
+
+#define hlist_entry_safe(ptr, type, member) (ptr) ? hlist_entry(ptr, type, member) : NULL
+
+#define hlist_for_each_entry(pos, head, member)                                  \
+	for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); pos; \
+	     pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
+
+#define hlist_for_each_entry_continue(pos, member)                                    \
+	for (pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member); pos; \
+	     pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
+
+#define hlist_for_each_entry_from(pos, member) \
+	for (; pos; pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
+
+#define hlist_for_each_entry_safe(pos, n, head, member)                                                 \
+	for (pos = hlist_entry_safe((head)->first, typeof(*pos), member); pos && ({                     \
+										  n = pos->member.next; \
+										  1;                    \
+									  });                           \
+	     pos = hlist_entry_safe(n, typeof(*pos), member))
+
+#endif /* __ZDTM_LIST_H__ */
diff --git a/test/zdtm/lib/lock.h b/test/zdtm/lib/lock.h
index 2b23550be..cc5306e06 100644
--- a/test/zdtm/lib/lock.h
+++ b/test/zdtm/lib/lock.h
@@ -7,6 +7,7 @@
 #include <sys/time.h>
 #include <limits.h>
 #include <errno.h>
+#include <signal.h>
 #include "asm/atomic.h"
 
 #define BUG_ON(condition)                                                        \
diff --git a/test/zdtm/lib/mountinfo.c b/test/zdtm/lib/mountinfo.c
new file mode 100644
index 000000000..d6ab67a3f
--- /dev/null
+++ b/test/zdtm/lib/mountinfo.c
@@ -0,0 +1,490 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "mountinfo.h"
+#include "fs.h"
+#include "xmalloc.h"
+
+/*
+ * mountinfo contains mangled paths. space, tab and back slash were replaced
+ * with usual octal escape. This function replaces these symbols back.
+ */
+static void cure_path(char *path)
+{
+	int i, len, off = 0;
+
+	if (strchr(path, '\\') == NULL) /* fast path */
+		return;
+
+	len = strlen(path);
+	for (i = 0; i < len; i++) {
+		if (!strncmp(path + i, "\\040", 4)) {
+			path[i - off] = ' ';
+			goto replace;
+		} else if (!strncmp(path + i, "\\011", 4)) {
+			path[i - off] = '\t';
+			goto replace;
+		} else if (!strncmp(path + i, "\\134", 4)) {
+			path[i - off] = '\\';
+			goto replace;
+		}
+		if (off)
+			path[i - off] = path[i];
+		continue;
+	replace:
+		off += 3;
+		i += 3;
+	}
+	path[len - off] = 0;
+}
+
+static struct mountinfo_zdtm *mountinfo_zdtm_alloc(struct mntns_zdtm *mntns)
+{
+	struct mountinfo_zdtm *new;
+
+	new = xzalloc(sizeof(struct mountinfo_zdtm));
+	if (new)
+		list_add_tail(&new->list, &mntns->mountinfo_list);
+	return new;
+}
+
+static void mountinfo_zdtm_free(struct mountinfo_zdtm *mountinfo)
+{
+	list_del(&mountinfo->list);
+	xfree(mountinfo->mountpoint);
+	xfree(mountinfo->root);
+	xfree(mountinfo->fstype);
+	xfree(mountinfo);
+}
+
+static void mountinfo_zdtm_free_all(struct mntns_zdtm *mntns)
+{
+	struct mountinfo_zdtm *mountinfo, *tmp;
+
+	list_for_each_entry_safe(mountinfo, tmp, &mntns->mountinfo_list, list)
+		mountinfo_zdtm_free(mountinfo);
+}
+
+#define BUF_SIZE 4096
+char buf[BUF_SIZE];
+
+int mntns_parse_mountinfo(struct mntns_zdtm *mntns)
+{
+	FILE *f;
+	int ret;
+
+	INIT_LIST_HEAD(&mntns->mountinfo_list);
+
+	f = fopen("/proc/self/mountinfo", "r");
+	if (!f) {
+		pr_perror("Failed to open mountinfo");
+		return -1;
+	}
+
+	while (fgets(buf, BUF_SIZE, f)) {
+		struct mountinfo_zdtm *new;
+		unsigned int kmaj, kmin;
+		char *str, *hyphen, *shared, *master;
+		int n;
+
+		new = mountinfo_zdtm_alloc(mntns);
+		if (!new) {
+			pr_perror("Failed to alloc mountinfo_zdtm");
+			goto free;
+		}
+
+		ret = sscanf(buf, "%i %i %u:%u %ms %ms %*s %n", &new->mnt_id, &new->parent_mnt_id, &kmaj, &kmin,
+			     &new->root, &new->mountpoint, &n);
+		if (ret != 6) {
+			pr_perror("Failed to parse mountinfo line \"%s\"", buf);
+			goto free;
+		}
+		cure_path(new->root);
+		cure_path(new->mountpoint);
+		new->s_dev = MKKDEV(kmaj, kmin);
+
+		str = buf + n;
+		hyphen = strstr(buf, " - ");
+		if (!hyphen) {
+			pr_perror("Failed to find \" - \" in mountinfo line \"%s\"", buf);
+			goto free;
+		}
+		*hyphen++ = '\0';
+
+		shared = strstr(str, "shared:");
+		if (shared)
+			new->shared_id = atoi(shared + 7);
+		master = strstr(str, "master:");
+		if (master)
+			new->master_id = atoi(master + 7);
+
+		ret = sscanf(hyphen, "- %ms", &new->fstype);
+		if (ret != 1) {
+			pr_perror("Failed to parse fstype in mountinfo tail \"%s\"", hyphen);
+			goto free;
+		}
+	}
+
+	fclose(f);
+	return 0;
+free:
+	mountinfo_zdtm_free_all(mntns);
+	fclose(f);
+	return -1;
+}
+
+static struct mountinfo_topology *mountinfo_topology_alloc(struct mntns_zdtm *mntns, struct mountinfo_zdtm *mountinfo)
+{
+	struct mountinfo_topology *new;
+
+	new = xzalloc(sizeof(struct mountinfo_topology));
+	if (new) {
+		new->mountinfo = mountinfo;
+		new->topology_id = -1;
+		INIT_LIST_HEAD(&new->children);
+		INIT_LIST_HEAD(&new->siblings);
+		list_add_tail(&new->list, &mntns->topology_list);
+		INIT_LIST_HEAD(&new->sharing_list);
+	}
+	return new;
+}
+
+static void mountinfo_topology_free(struct mountinfo_topology *topology)
+{
+	list_del(&topology->list);
+	xfree(topology);
+}
+
+static void mountinfo_topology_free_all(struct mntns_zdtm *mntns)
+{
+	struct mountinfo_topology *topology, *tmp;
+
+	list_for_each_entry_safe(topology, tmp, &mntns->topology_list, list)
+		mountinfo_topology_free(topology);
+}
+
+static struct mountinfo_topology *mountinfo_topology_lookup_parent(struct mntns_zdtm *mntns,
+								   struct mountinfo_topology *topology)
+{
+	struct mountinfo_topology *parent;
+
+	list_for_each_entry(parent, &mntns->topology_list, list) {
+		if (parent->mountinfo->mnt_id == topology->mountinfo->parent_mnt_id)
+			return parent;
+	}
+
+	return NULL;
+}
+
+static struct mountinfo_topology *mt_subtree_next(struct mountinfo_topology *mt, struct mountinfo_topology *root)
+{
+	if (!list_empty(&mt->children))
+		return list_entry(mt->children.next, struct mountinfo_topology, siblings);
+
+	while (mt->parent && mt != root) {
+		if (mt->siblings.next == &mt->parent->children)
+			mt = mt->parent;
+		else
+			return list_entry(mt->siblings.next, struct mountinfo_topology, siblings);
+	}
+
+	return NULL;
+}
+
+static void __mt_resort_siblings(struct mountinfo_topology *parent)
+{
+	LIST_HEAD(list);
+
+	while (!list_empty(&parent->children)) {
+		struct mountinfo_topology *m, *p;
+
+		m = list_first_entry(&parent->children, struct mountinfo_topology, siblings);
+		list_del(&m->siblings);
+
+		list_for_each_entry(p, &list, siblings)
+			if (strcmp(p->mountinfo->mountpoint, m->mountinfo->mountpoint) < 0)
+				break;
+
+		list_add_tail(&m->siblings, &p->siblings);
+	}
+
+	list_splice(&list, &parent->children);
+}
+
+static void mntns_mt_resort_siblings(struct mntns_zdtm *mntns)
+{
+	struct mountinfo_topology *mt = mntns->tree;
+	LIST_HEAD(mtlist);
+	int i = 0;
+
+	while (1) {
+		/* Assign topology id to mt in dfs order */
+		mt->topology_id = i++;
+		list_move_tail(&mt->list, &mtlist);
+		__mt_resort_siblings(mt);
+		mt = mt_subtree_next(mt, mntns->tree);
+		if (!mt)
+			break;
+	}
+
+	/* Update mntns->topology_list in dfs order */
+	list_splice(&mtlist, &mntns->topology_list);
+}
+
+static struct sharing_group *sharing_group_find_or_alloc(struct mntns_zdtm *mntns, int shared_id, int master_id,
+							 unsigned int s_dev)
+{
+	struct sharing_group *sg;
+
+	list_for_each_entry(sg, &mntns->sharing_groups_list, list) {
+		if ((sg->shared_id == shared_id) && (sg->master_id == master_id)) {
+			if (sg->s_dev != s_dev) {
+				pr_err("Sharing/devid inconsistency\n");
+				return NULL;
+			}
+			return sg;
+		}
+	}
+
+	sg = xzalloc(sizeof(struct sharing_group));
+	if (!sg)
+		return NULL;
+
+	sg->shared_id = shared_id;
+	sg->master_id = master_id;
+	sg->s_dev = s_dev;
+	sg->topology_id = -1;
+
+	INIT_LIST_HEAD(&sg->children);
+	INIT_LIST_HEAD(&sg->siblings);
+	INIT_LIST_HEAD(&sg->mounts_list);
+
+	list_add_tail(&sg->list, &mntns->sharing_groups_list);
+
+	return sg;
+}
+
+static void sharing_group_free(struct sharing_group *sg)
+{
+	list_del(&sg->list);
+	xfree(sg);
+}
+
+static void sharing_group_free_all(struct mntns_zdtm *mntns)
+{
+	struct sharing_group *sg, *tmp;
+
+	list_for_each_entry_safe(sg, tmp, &mntns->sharing_groups_list, list)
+		sharing_group_free(sg);
+}
+
+static struct sharing_group *sharing_group_lookup_parent(struct mntns_zdtm *mntns, struct sharing_group *sg)
+{
+	struct sharing_group *parent;
+
+	list_for_each_entry(parent, &mntns->sharing_groups_list, list) {
+		if (parent->shared_id == sg->master_id)
+			return parent;
+	}
+
+	/* Create "external" sharing */
+	parent = sharing_group_find_or_alloc(mntns, sg->master_id, 0, sg->s_dev);
+	if (parent)
+		return parent;
+
+	return NULL;
+}
+
+static int mntns_build_tree(struct mntns_zdtm *mntns)
+{
+	struct mountinfo_topology *topology, *parent, *tree = NULL;
+	struct mountinfo_zdtm *mountinfo;
+	struct sharing_group *sg, *sg_parent;
+
+	INIT_LIST_HEAD(&mntns->topology_list);
+
+	/* Prealloc mount tree */
+	list_for_each_entry(mountinfo, &mntns->mountinfo_list, list) {
+		topology = mountinfo_topology_alloc(mntns, mountinfo);
+		if (!topology)
+			goto err;
+	}
+
+	/* Build mount tree */
+	list_for_each_entry(topology, &mntns->topology_list, list) {
+		parent = mountinfo_topology_lookup_parent(mntns, topology);
+		if (!parent) {
+			if (tree) {
+				pr_err("Bad mount tree with too roots %d and %d\n", tree->mountinfo->mnt_id,
+				       parent->mountinfo->mnt_id);
+				goto err;
+			}
+			tree = topology;
+		} else {
+			topology->parent = parent;
+			list_add_tail(&topology->siblings, &parent->children);
+		}
+	}
+	mntns->tree = tree;
+
+	/* Sort mounts by mountpoint */
+	mntns_mt_resort_siblings(mntns);
+
+	INIT_LIST_HEAD(&mntns->sharing_groups_list);
+
+	/* Prealloc sharing groups */
+	list_for_each_entry(topology, &mntns->topology_list, list) {
+		if (!topology->mountinfo->shared_id && !topology->mountinfo->master_id)
+			continue;
+
+		/*
+		 * Due to mntns->topology_list is sorted in dfs order
+		 * sharing groups are also sorted the same
+		 */
+		sg = sharing_group_find_or_alloc(mntns, topology->mountinfo->shared_id, topology->mountinfo->master_id,
+						 topology->mountinfo->s_dev);
+		if (!sg)
+			goto err;
+
+		list_add_tail(&topology->sharing_list, &sg->mounts_list);
+		topology->sharing = sg;
+
+		/* Set sharing group topology id to minimal topology id of it's mounts */
+		if (sg->topology_id == -1 || topology->topology_id < sg->topology_id)
+			sg->topology_id = topology->topology_id;
+	}
+
+	/* Build sharing group trees */
+	list_for_each_entry(sg, &mntns->sharing_groups_list, list) {
+		if (sg->master_id) {
+			sg_parent = sharing_group_lookup_parent(mntns, sg);
+			sg->parent = sg_parent;
+			list_add(&sg->siblings, &sg_parent->children);
+		}
+	}
+
+	return 0;
+err:
+	mountinfo_topology_free_all(mntns);
+	sharing_group_free_all(mntns);
+	return -1;
+}
+
+static int mountinfo_topology_list_compare(struct mntns_zdtm *mntns_a, struct mntns_zdtm *mntns_b)
+{
+	struct mountinfo_topology *topology_a, *topology_b;
+
+	topology_a = list_first_entry(&mntns_a->topology_list, struct mountinfo_topology, list);
+	topology_b = list_first_entry(&mntns_b->topology_list, struct mountinfo_topology, list);
+
+	while (&topology_a->list != &mntns_a->topology_list && &topology_b->list != &mntns_b->topology_list) {
+		if (topology_a->topology_id != topology_b->topology_id) {
+			pr_err("Mounts %d and %d have different topology id %d and %d\n", topology_a->mountinfo->mnt_id,
+			       topology_b->mountinfo->mnt_id, topology_a->topology_id, topology_b->topology_id);
+			return -1;
+		}
+
+		if (topology_a->parent && topology_b->parent) {
+			if (topology_a->parent->topology_id != topology_b->parent->topology_id) {
+				pr_err("Mounts %d and %d have different parent topology id %d and %d\n",
+				       topology_a->mountinfo->mnt_id, topology_b->mountinfo->mnt_id,
+				       topology_a->parent->topology_id, topology_b->parent->topology_id);
+				return -1;
+			}
+		} else if (topology_a->parent || topology_b->parent) {
+			pr_err("One of mounts %d and %d has parent and other doesn't\n", topology_a->mountinfo->mnt_id,
+			       topology_b->mountinfo->mnt_id);
+			return -1;
+		}
+
+		if (topology_a->sharing && topology_b->sharing) {
+			if (topology_a->sharing->topology_id != topology_b->sharing->topology_id) {
+				pr_err("Mounts %d and %d have different sharing topology id %d and %d\n",
+				       topology_a->mountinfo->mnt_id, topology_b->mountinfo->mnt_id,
+				       topology_a->sharing->topology_id, topology_b->sharing->topology_id);
+				return -1;
+			}
+		} else if (topology_a->sharing || topology_b->sharing) {
+			pr_err("One of mounts %d and %d has sharing and other doesn't\n", topology_a->mountinfo->mnt_id,
+			       topology_b->mountinfo->mnt_id);
+			return -1;
+		}
+
+		topology_a = list_entry(topology_a->list.next, struct mountinfo_topology, list);
+		topology_b = list_entry(topology_b->list.next, struct mountinfo_topology, list);
+	}
+	if (&topology_a->list != &mntns_a->topology_list || &topology_b->list != &mntns_b->topology_list) {
+		pr_err("Mount tree topology length mismatch\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int sharing_group_list_compare(struct mntns_zdtm *mntns_a, struct mntns_zdtm *mntns_b)
+{
+	struct sharing_group *sg_a, *sg_b;
+
+	sg_a = list_first_entry(&mntns_a->sharing_groups_list, struct sharing_group, list);
+	sg_b = list_first_entry(&mntns_b->sharing_groups_list, struct sharing_group, list);
+
+	while (&sg_a->list != &mntns_a->sharing_groups_list && &sg_b->list != &mntns_b->sharing_groups_list) {
+		if (sg_a->topology_id != sg_b->topology_id) {
+			pr_err("Sharings (%d,%d) and (%d,%d) have different sharing topology id %d and %d\n",
+			       sg_a->shared_id, sg_a->master_id, sg_b->shared_id, sg_b->master_id, sg_a->topology_id,
+			       sg_b->topology_id);
+			return -1;
+		}
+
+		if (sg_a->parent && sg_b->parent) {
+			if (sg_a->parent->topology_id != sg_b->parent->topology_id) {
+				pr_err("Sharings (%d,%d) and (%d,%d) have different parent topology id %d and %d\n",
+				       sg_a->shared_id, sg_a->master_id, sg_b->shared_id, sg_b->master_id,
+				       sg_a->parent->topology_id, sg_b->parent->topology_id);
+				return -1;
+			}
+		} else if (sg_a->parent || sg_b->parent) {
+			pr_err("One of sharings (%d,%d) and (%d,%d) has parent and other doesn't\n", sg_a->shared_id,
+			       sg_a->master_id, sg_b->shared_id, sg_b->master_id);
+			return -1;
+		}
+
+		sg_a = list_entry(sg_a->list.next, struct sharing_group, list);
+		sg_b = list_entry(sg_b->list.next, struct sharing_group, list);
+	}
+
+	if (&sg_a->list != &mntns_a->sharing_groups_list || &sg_b->list != &mntns_b->sharing_groups_list) {
+		pr_err("Mount tree sharing topology length mismatch\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int mntns_compare(struct mntns_zdtm *mntns_a, struct mntns_zdtm *mntns_b)
+{
+	if (mntns_build_tree(mntns_a)) {
+		pr_err("Failed to build first mountinfo topology tree\n");
+		return -1;
+	}
+
+	if (mntns_build_tree(mntns_b)) {
+		pr_err("Failed to build second mountinfo topology tree\n");
+		return -1;
+	}
+
+	if (mountinfo_topology_list_compare(mntns_a, mntns_b))
+		return -1;
+
+	if (sharing_group_list_compare(mntns_a, mntns_b))
+		return -1;
+
+	return 0;
+}
+
+void mntns_free_all(struct mntns_zdtm *mntns)
+{
+	mountinfo_zdtm_free_all(mntns);
+	mountinfo_topology_free_all(mntns);
+	sharing_group_free_all(mntns);
+}
diff --git a/test/zdtm/lib/mountinfo.h b/test/zdtm/lib/mountinfo.h
new file mode 100644
index 000000000..6d90e2c10
--- /dev/null
+++ b/test/zdtm/lib/mountinfo.h
@@ -0,0 +1,70 @@
+#ifndef __ZDTM_MOUNTINFO__
+#define __ZDTM_MOUNTINFO__
+
+#include "list.h"
+
+struct mountinfo_zdtm {
+	int mnt_id;
+	int parent_mnt_id;
+	char *mountpoint;
+	char *root;
+	unsigned int s_dev;
+	int shared_id;
+	int master_id;
+	char *fstype;
+
+	/* list of all mounts */
+	struct list_head list;
+};
+
+struct mntns_zdtm {
+	struct list_head mountinfo_list;
+	struct list_head topology_list;
+	struct mountinfo_topology *tree;
+	struct list_head sharing_groups_list;
+};
+
+#define MNTNS_ZDTM_INIT(name)                                                    \
+	{                                                                        \
+		.mountinfo_list = LIST_HEAD_INIT(name.mountinfo_list),           \
+		.topology_list = LIST_HEAD_INIT(name.topology_list),             \
+		.sharing_groups_list = LIST_HEAD_INIT(name.sharing_groups_list), \
+	}
+#define MNTNS_ZDTM(name) struct mntns_zdtm name = MNTNS_ZDTM_INIT(name)
+
+struct sharing_group {
+	int shared_id;
+	int master_id;
+	unsigned int s_dev;
+
+	struct sharing_group *parent;
+	struct list_head children;
+	struct list_head siblings;
+
+	int topology_id;
+
+	struct list_head mounts_list;
+
+	struct list_head list;
+};
+
+struct mountinfo_topology {
+	struct mountinfo_zdtm *mountinfo;
+
+	struct mountinfo_topology *parent;
+	struct list_head children;
+	struct list_head siblings;
+
+	int topology_id;
+
+	struct sharing_group *sharing;
+	struct list_head sharing_list;
+
+	struct list_head list;
+};
+
+extern int mntns_parse_mountinfo(struct mntns_zdtm *mntns);
+extern void mntns_free_all(struct mntns_zdtm *mntns);
+extern int mntns_compare(struct mntns_zdtm *mntns_a, struct mntns_zdtm *mntns_b);
+
+#endif
diff --git a/test/zdtm/lib/msg.c b/test/zdtm/lib/msg.c
index 1cf92e3e0..9ba1c47a4 100644
--- a/test/zdtm/lib/msg.c
+++ b/test/zdtm/lib/msg.c
@@ -1,4 +1,5 @@
 #include <stdarg.h>
+#include <inttypes.h>
 #include <errno.h>
 #include <unistd.h>
 #include <fcntl.h>
@@ -55,7 +56,7 @@ void test_msg(const char *format, ...)
 		off += strftime(buf, sizeof(buf), "%H:%M:%S", tm);
 	}
 
-	off += sprintf(buf + off, ".%.3ld: ", tv.tv_usec / 1000);
+	off += sprintf(buf + off, ".%.3" PRId64 ": ", (int64_t)(tv.tv_usec / 1000));
 	off += sprintf(buf + off, "%5d: ", getpid());
 
 skip:
diff --git a/test/zdtm/lib/ns.c b/test/zdtm/lib/ns.c
index 6f6cccc99..822e09c92 100644
--- a/test/zdtm/lib/ns.c
+++ b/test/zdtm/lib/ns.c
@@ -1,6 +1,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include <inttypes.h>
 #include <grp.h>
 #include <string.h>
 #include <errno.h>
@@ -27,8 +28,9 @@ extern int pivot_root(const char *new_root, const char *put_old);
 static int prepare_mntns(void)
 {
 	int dfd, ret;
-	char *root, *criu_path;
+	char *root, *criu_path, *dev_path, *zdtm_bind;
 	char path[PATH_MAX];
+	char bind_path[PATH_MAX];
 
 	root = getenv("ZDTM_ROOT");
 	if (!root) {
@@ -51,6 +53,34 @@ static int prepare_mntns(void)
 		return -1;
 	}
 
+	zdtm_bind = getenv("ZDTM_BIND");
+	if (zdtm_bind) {
+		/*
+		 * Bindmount the directory to itself.
+		 * e.g.: The mnt_ro_root test makes "/" mount readonly, but we
+		 * still want to write logs to /zdtm/static/ so let's make it
+		 * separate writable bind mount.
+		 */
+		snprintf(bind_path, sizeof(bind_path),  "%s/%s", root, zdtm_bind);
+		if (mount(bind_path, bind_path, NULL, MS_BIND, NULL)) {
+			fprintf(stderr, "Can't bind-mount ZDTM_BIND: %m\n");
+			return -1;
+		}
+	}
+
+	dev_path = getenv("ZDTM_DEV");
+	if (dev_path) {
+		snprintf(path, sizeof(path), "%s/dev", root);
+		if (mount(dev_path, path, NULL, MS_BIND, NULL)) {
+			pr_perror("Unable to mount %s",  path);
+			return -1;
+		}
+		if (mount(NULL, path, NULL, MS_PRIVATE, NULL)) {
+			pr_perror("Unable to mount %s",  path);
+			return -1;
+		}
+	}
+
 	criu_path = getenv("ZDTM_CRIU");
 	if (criu_path) {
 		snprintf(path, sizeof(path), "%s%s", root, criu_path);
@@ -218,7 +248,7 @@ static inline int _settime(clockid_t clk_id, time_t offset)
 	if (clk_id == CLOCK_MONOTONIC_COARSE || clk_id == CLOCK_MONOTONIC_RAW)
 		clk_id = CLOCK_MONOTONIC;
 
-	len = snprintf(buf, sizeof(buf), "%d %ld 0", clk_id, offset);
+	len = snprintf(buf, sizeof(buf), "%d %" PRId64 " 0", clk_id, (int64_t)offset);
 
 	fd = open("/proc/self/timens_offsets", O_WRONLY);
 	if (fd < 0) {
diff --git a/test/zdtm/lib/sysctl.c b/test/zdtm/lib/sysctl.c
index 9583ec3df..3b1ebc168 100644
--- a/test/zdtm/lib/sysctl.c
+++ b/test/zdtm/lib/sysctl.c
@@ -3,6 +3,49 @@
 #include "zdtmtst.h"
 #include "sysctl.h"
 
+int sysctl_read_str(const char *name, char *data, size_t size)
+{
+	int fd, ret;
+
+	fd = open(name, O_RDONLY);
+	if (fd < 0) {
+		pr_perror("Can't open %s", name);
+		return -1;
+	}
+
+	ret = read(fd, data, size - 1);
+	if (ret < 0) {
+		pr_perror("Can't read %s", name);
+		close(fd);
+		return -1;
+	}
+	data[ret] = '\0';
+	close(fd);
+
+	return 0;
+}
+
+int sysctl_write_str(const char *name, char *data)
+{
+	int fd, ret;
+
+	fd = open(name, O_WRONLY);
+	if (fd < 0) {
+		pr_perror("Can't open %s", name);
+		return -1;
+	}
+
+	ret = write(fd, data, strlen(data));
+	if (ret < 0) {
+		pr_perror("Can't write %s into %s", data, name);
+		close(fd);
+		return -1;
+	}
+	close(fd);
+
+	return 0;
+}
+
 int sysctl_read_int(const char *name, int *data)
 {
 	int fd;
diff --git a/test/zdtm/lib/sysctl.h b/test/zdtm/lib/sysctl.h
index 67129102f..d435bd7e9 100644
--- a/test/zdtm/lib/sysctl.h
+++ b/test/zdtm/lib/sysctl.h
@@ -3,5 +3,7 @@
 
 extern int sysctl_read_int(const char *name, int *data);
 extern int sysctl_write_int(const char *name, int val);
+extern int sysctl_read_str(const char *name, char *data, size_t size);
+extern int sysctl_write_str(const char *name, char *data);
 
 #endif
diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c
index 57eb42046..95017e42e 100644
--- a/test/zdtm/lib/test.c
+++ b/test/zdtm/lib/test.c
@@ -239,34 +239,37 @@ void test_init(int argc, char **argv)
 		exit(1);
 	}
 
-	val = getenv("ZDTM_GROUPS");
-	if (val) {
-		char *tok = NULL;
-		unsigned int size = 0, groups[NGROUPS_MAX];
+	val = getenv("ZDTM_ROOTLESS");
+	if (!val) {
+		val = getenv("ZDTM_GROUPS");
+		if (val) {
+			char *tok = NULL;
+			unsigned int size = 0, groups[NGROUPS_MAX];
 
-		tok = strtok(val, " ");
-		while (tok) {
-			size++;
-			groups[size - 1] = atoi(tok);
-			tok = strtok(NULL, " ");
+			tok = strtok(val, " ");
+			while (tok) {
+				size++;
+				groups[size - 1] = atoi(tok);
+				tok = strtok(NULL, " ");
+			}
+
+			if (setgroups(size, groups)) {
+				fprintf(stderr, "Can't set groups: %m");
+				exit(1);
+			}
 		}
 
-		if (setgroups(size, groups)) {
-			fprintf(stderr, "Can't set groups: %m");
+		val = getenv("ZDTM_GID");
+		if (val && (setgid(atoi(val)) == -1)) {
+			fprintf(stderr, "Can't set gid: %m");
 			exit(1);
 		}
-	}
 
-	val = getenv("ZDTM_GID");
-	if (val && (setgid(atoi(val)) == -1)) {
-		fprintf(stderr, "Can't set gid: %m");
-		exit(1);
-	}
-
-	val = getenv("ZDTM_UID");
-	if (val && (setuid(atoi(val)) == -1)) {
-		fprintf(stderr, "Can't set gid: %m");
-		exit(1);
+		val = getenv("ZDTM_UID");
+		if (val && (setuid(atoi(val)) == -1)) {
+			fprintf(stderr, "Can't set gid: %m");
+			exit(1);
+		}
 	}
 
 	if (prctl(PR_SET_DUMPABLE, 1)) {
@@ -403,7 +406,7 @@ pid_t sys_clone_unified(unsigned long flags, void *child_stack, void *parent_tid
 {
 #ifdef __x86_64__
 	return (pid_t)syscall(__NR_clone, flags, child_stack, parent_tid, child_tid, newtls);
-#elif (__i386__ || __arm__ || __aarch64__ || __powerpc64__ || __mips__)
+#elif (__i386__ || __arm__ || __aarch64__ || __powerpc64__ || __mips__ || __loongarch64 || __riscv)
 	return (pid_t)syscall(__NR_clone, flags, child_stack, parent_tid, newtls, child_tid);
 #elif __s390x__
 	return (pid_t)syscall(__NR_clone, child_stack, flags, parent_tid, child_tid, newtls);
diff --git a/test/zdtm/lib/unix.c b/test/zdtm/lib/unix.c
index 49773dedd..288f1df24 100644
--- a/test/zdtm/lib/unix.c
+++ b/test/zdtm/lib/unix.c
@@ -5,7 +5,7 @@
 
 int unix_fill_sock_name(struct sockaddr_un *name, char *relFilename)
 {
-	char *cwd;
+	cleanup_free char *cwd = NULL;
 
 	if (get_cwd_check_perm(&cwd)) {
 		pr_err("failed to get current working directory with valid permissions.\n");
diff --git a/test/zdtm/lib/xmalloc.h b/test/zdtm/lib/xmalloc.h
new file mode 100644
index 000000000..95e0d4043
--- /dev/null
+++ b/test/zdtm/lib/xmalloc.h
@@ -0,0 +1,68 @@
+#ifndef __ZDTM_XMALLOC_H__
+#define __ZDTM_XMALLOC_H__
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef pr_err
+#error "Macro pr_err is needed."
+#endif
+
+#define __xalloc(op, size, ...)                                                           \
+	({                                                                                \
+		void *___p = op(__VA_ARGS__);                                             \
+		if (!___p)                                                                \
+			pr_err("%s: Can't allocate %li bytes\n", __func__, (long)(size)); \
+		___p;                                                                     \
+	})
+
+#define xstrdup(str)	  __xalloc(strdup, strlen(str) + 1, str)
+#define xmalloc(size)	  __xalloc(malloc, size, size)
+#define xzalloc(size)	  __xalloc(calloc, size, 1, size)
+#define xrealloc(p, size) __xalloc(realloc, size, p, size)
+
+#define xfree(p) free(p)
+
+#define xrealloc_safe(pptr, size)                  \
+	({                                         \
+		int __ret = -1;                    \
+		void *new = xrealloc(*pptr, size); \
+		if (new) {                         \
+			*pptr = new;               \
+			__ret = 0;                 \
+		}                                  \
+		__ret;                             \
+	})
+
+#define xmemdup(ptr, size)                      \
+	({                                      \
+		void *new = xmalloc(size);      \
+		if (new)                        \
+			memcpy(new, ptr, size); \
+		new;                            \
+	})
+
+#define memzero_p(p)	 memset(p, 0, sizeof(*p))
+#define memzero(p, size) memset(p, 0, size)
+
+/*
+ * Helper for allocating trees with single xmalloc.
+ * This one advances the void *pointer on s bytes and
+ * returns the previous value. Use like this
+ *
+ * m = xmalloc(total_size);
+ * a = xptr_pull(&m, tree_root_t);
+ * a->b = xptr_pull(&m, leaf_a_t);
+ * a->c = xptr_pull(&m, leaf_c_t);
+ * ...
+ */
+static inline void *xptr_pull_s(void **m, size_t s)
+{
+	void *ret = (*m);
+	(*m) += s;
+	return ret;
+}
+
+#define xptr_pull(m, type) xptr_pull_s(m, sizeof(type))
+
+#endif /* __CR_XMALLOC_H__ */
diff --git a/test/zdtm/lib/zdtmtst.h b/test/zdtm/lib/zdtmtst.h
index ed7c23ee2..b0e25702e 100644
--- a/test/zdtm/lib/zdtmtst.h
+++ b/test/zdtm/lib/zdtmtst.h
@@ -126,11 +126,25 @@ extern int write_pidfile(int pid);
 /* message helpers */
 extern int test_log_init(const char *outfile, const char *suffix);
 extern int zdtm_seccomp;
-#define pr_err(format, arg...) test_msg("ERR: %s:%d: " format, __FILE__, __LINE__, ##arg)
-#define pr_perror(format, arg...) \
-	test_msg("ERR: %s:%d: " format " (errno = %d (%s))\n", __FILE__, __LINE__, ##arg, errno, strerror(errno))
-#define fail(format, arg...) \
-	test_msg("FAIL: %s:%d: " format " (errno = %d (%s))\n", __FILE__, __LINE__, ##arg, errno, strerror(errno))
+#define pr_err(format, arg...)                                              \
+	({                                                                  \
+		test_msg("ERR: %s:%d: " format, __FILE__, __LINE__, ##arg); \
+		1;                                                          \
+	})
+
+#define pr_perror(format, arg...)                                                                        \
+	({                                                                                               \
+		test_msg("ERR: %s:%d: " format " (errno = %d (%s))\n", __FILE__, __LINE__, ##arg, errno, \
+			 strerror(errno));                                                               \
+		1;                                                                                       \
+	})
+
+#define fail(format, arg...)                                                                              \
+	({                                                                                                \
+		test_msg("FAIL: %s:%d: " format " (errno = %d (%s))\n", __FILE__, __LINE__, ##arg, errno, \
+			 strerror(errno));                                                                \
+		1;                                                                                        \
+	})
 #define skip(format, arg...) test_msg("SKIP: %s:%d: " format "\n", __FILE__, __LINE__, ##arg)
 #define pass()		     test_msg("PASS\n")
 
@@ -202,4 +216,13 @@ static inline void cleanup_closep(void *p)
 		TEMP_FAILURE_RETRY(close(*pp));
 }
 
+extern int write_value(const char *path, const char *value);
+extern int read_value(const char *path, char *value, int size);
+
+#define container_of(ptr, type, member)                            \
+	({                                                         \
+		const typeof(((type *)0)->member) *__mptr = (ptr); \
+		(type *)((char *)__mptr - offsetof(type, member)); \
+	})
+
 #endif /* _VIMITESU_H_ */
diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile
index 4a93659d4..e1df2e5fa 100644
--- a/test/zdtm/static/Makefile
+++ b/test/zdtm/static/Makefile
@@ -8,6 +8,7 @@ TST_NOFILE	:=				\
 		sleeping00			\
 		pid00				\
 		caps00				\
+		caps01				\
 		wait00				\
 		zombie00			\
 		zombie01			\
@@ -23,6 +24,7 @@ TST_NOFILE	:=				\
 		sse20				\
 		mprotect00			\
 		timers				\
+		timers01			\
 		timerfd				\
 		unbound_sock			\
 		sched_prio00			\
@@ -35,6 +37,8 @@ TST_NOFILE	:=				\
 		socket_udp-corked		\
 		socket6_udp			\
 		socket_udp_shutdown		\
+		socket_icmp			\
+		socket6_icmp			\
 		sk-freebind			\
 		sk-freebind-false		\
 		socket_udplite			\
@@ -53,15 +57,24 @@ TST_NOFILE	:=				\
 		shm				\
 		shm-mp				\
 		ptrace_sig			\
+		pidfd_self			\
+		pidfd_of_thread			\
+		pidfd_dead			\
+		pidfd_diffdead			\
+		pidfd_child			\
+		pidfd_kill			\
+		fd_from_pidfd			\
 		pipe00				\
 		pipe01				\
 		pipe02				\
 		pthread00			\
+		pthread00-pac			\
 		pthread01			\
 		pthread02			\
 		pthread_timers			\
 		pthread_timers_h		\
 		rseq00				\
+		membarrier			\
 		vdso00				\
 		vdso01				\
 		vdso02				\
@@ -84,7 +97,8 @@ TST_NOFILE	:=				\
 		socket-tcp4v6			\
 		socket-tcp-local		\
 		socket-tcp-reuseport		\
-		socket-tcp-nfconntrack		\
+		socket-tcp-ipt-nfconntrack	\
+		socket-tcp-nft-nfconntrack	\
 		socket-tcp6-local		\
 		socket-tcp4v6-local		\
 		socket-tcpbuf			\
@@ -123,6 +137,10 @@ TST_NOFILE	:=				\
 		sock_opts00			\
 		sock_opts01			\
 		sock_opts02			\
+		sock_ip_opts00			\
+		sock_ip_opts01			\
+		sock_tcp_opts00			\
+		sock_tcp_opts01			\
 		sk-unix-unconn			\
 		sk-unix-unconn-seqpacket	\
 		ipc_namespace			\
@@ -134,6 +152,7 @@ TST_NOFILE	:=				\
 		maps05				\
 		maps09				\
 		maps10				\
+		maps11				\
 		mlock_setuid			\
 		xids00				\
 		groups				\
@@ -184,6 +203,8 @@ TST_NOFILE	:=				\
 		stopped01			\
 		stopped02			\
 		stopped12			\
+		stopped03			\
+		stopped04			\
 		rtc				\
 		clean_mntns			\
 		mntns_rw_ro_rw			\
@@ -199,6 +220,7 @@ TST_NOFILE	:=				\
 		scm04				\
 		scm05				\
 		scm06				\
+		scm09				\
 		aio00				\
 		aio01				\
 		fd				\
@@ -210,6 +232,7 @@ TST_NOFILE	:=				\
 		seccomp_filter_tsync			\
 		seccomp_filter_threads			\
 		seccomp_filter_inheritance		\
+		seccomp_no_new_privs		\
 		different_creds			\
 		vsx				\
 		bridge				\
@@ -251,6 +274,8 @@ TST_NOFILE	:=				\
 		memfd02				\
 		memfd02-hugetlb			\
 		memfd03				\
+		memfd04				\
+		memfd05				\
 		shmemfd				\
 		shmemfd-priv			\
 		time				\
@@ -260,19 +285,24 @@ TST_NOFILE	:=				\
 		sigtrap				\
 		sigtrap01			\
 		change_mnt_context		\
+		fd_offset			\
 #		jobctl00			\
 
 PKG_CONFIG ?= pkg-config
 pkg-config-check = $(shell sh -c '$(PKG_CONFIG) $(1) && echo y')
+pkg-config-atleast-version = $(shell sh -c '$(PKG_CONFIG) --atleast-version=$(2) $(1) && echo y')
 ifeq ($(call pkg-config-check,libbpf),y)
 TST_NOFILE	+=				\
 		bpf_hash			\
-		bpf_array			
+		bpf_array
 endif
 
 ifneq ($(ARCH),arm)
 ifneq ($(COMPAT_TEST),y)
-        TST_NOFILE += maps03
+	TST_NOFILE += maps03
+ifeq ($(call pkg-config-atleast-version,libtracefs,1.7),y)
+	TST_NOFILE += uprobes
+endif
 endif
 endif
 
@@ -289,6 +319,7 @@ TST_FILE	=				\
 		write_read02			\
 		write_read10			\
 		maps00				\
+		maps12				\
 		link10				\
 		file_attr			\
 		deleted_unix_sock		\
@@ -303,6 +334,10 @@ TST_FILE	=				\
 		ghost_holes00			\
 		ghost_holes01			\
 		ghost_holes02			\
+		ghost_holes_large00     \
+		ghost_holes_large01     \
+		ghost_multi_hole00      \
+		ghost_multi_hole01      \
 		unlink_largefile		\
 		mtime_mmap			\
 		fifo				\
@@ -347,6 +382,12 @@ TST_FILE	=				\
 		socket_close_data01		\
 		fifo_upon_unix_socket00		\
 		fifo_upon_unix_socket01		\
+		sk-unix-listen01		\
+		sk-unix-listen02		\
+		sk-unix-listen03		\
+		sk-unix-listen04		\
+		sk-unix-restore-fs-share	\
+		mnt_ext_file_bind_auto		\
 
 TST_DIR		=				\
 		cwd00				\
@@ -377,10 +418,13 @@ TST_DIR		=				\
 		cgroup02			\
 		cgroup03			\
 		cgroup04			\
+		cgroupv2_00			\
+		cgroupv2_01			\
 		cgroup_ifpriomap		\
 		cgroup_ignore			\
 		cgroup_stray			\
 		cgroup_yard			\
+		cgroup_threads			\
 		unlink_fstat04			\
 		unlink_fstat041			\
 		mntns_remap			\
@@ -389,6 +433,7 @@ TST_DIR		=				\
 		mntns_ghost			\
 		mntns_ghost01			\
 		mntns_ro_root			\
+		mnt_ro_root			\
 		mntns_link_ghost		\
 		mntns_shared_bind		\
 		mntns_shared_bind02		\
@@ -464,35 +509,41 @@ STATE_OUT	= $(TST_STATE:%=%.out)
 
 include ../Makefile.inc
 
+ifeq ($(ARCH),aarch64)
+	PAC_CFLAGS := -mbranch-protection=standard
+else
+	PAC_CFLAGS :=
+endif
+
 all:	$(TST) criu-rtc.so
 install: all
 .PHONY: all install
 
 $(TST_NOFILE:%=%.pid):	%.pid:	%
-	$(<D)/$(<F) --pidfile=$@ --outfile=$<.out
+	$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out
 
 $(TST_FILE:%=%.pid):	%.pid:	%
-	$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --filename=$<.test
+	$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --filename=$<.test
 
 $(TST_DIR:%=%.pid):	%.pid:	%
-	$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --dirname=$<.test
+	$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --dirname=$<.test
 
 $(TST_DIR_FILE:%=%.pid):	%.pid:	%
-	$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --dirname=$<.dir.test --filename=$<.test
+	$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --dirname=$<.dir.test --filename=$<.test
 
 cmdlinenv00.pid: cmdlinenv00
-	$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --arg1=arg1 --arg2=arg2 --arg3=arg3
+	$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --arg1=arg1 --arg2=arg2 --arg3=arg3
 
 shm-unaligned.pid: shm-unaligned
-	$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --shmem_size=5000
+	$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --shmem_size=5000
 
 shm-hugetlb.pid: shm-hugetlb
-	$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --shmem_size=4194304
+	$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --shmem_size=4194304
 
 env00.pid:	env00
-	$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --envname=ENV_00_TEST
+	$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --envname=ENV_00_TEST
 umask00.pid:	umask00
-	$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --mask=0345
+	$(TEST_ENV) $(<D)/$(<F) --pidfile=$@ --outfile=$<.out --mask=0345
 
 fifo-rowo-pair.pid: fifo-rowo-pair
 	$(<D)/$(<F) --pidfile=$@ --outfile=$<.out --name_master=$<.master.test --name_slave=$<.slave.test
@@ -555,6 +606,8 @@ uptime_grow:		LDLIBS += -lrt -pthread
 unlink_largefile:	CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE
 inotify_system_nodel:	CFLAGS += -DNO_DEL
 pthread00:		LDLIBS += -pthread
+pthread00-pac:		CFLAGS += ${PAC_CFLAGS}
+pthread00-pac:		LDLIBS += -pthread
 pthread01:		LDLIBS += -pthread
 pthread02:		LDLIBS += -pthread
 pthread_timers:		LDLIBS += -lrt -pthread
@@ -578,13 +631,17 @@ socket-tcpbuf6-local:	CFLAGS += -D ZDTM_TCP_LOCAL -D ZDTM_IPV6
 socket-tcp6-local:	CFLAGS += -D ZDTM_TCP_LOCAL -D ZDTM_IPV6
 socket-tcp4v6-local:	CFLAGS += -D ZDTM_TCP_LOCAL -D ZDTM_IPV4V6
 socket-tcp-local:	CFLAGS += -D ZDTM_TCP_LOCAL
-socket-tcp-nfconntrack: CFLAGS += -D ZDTM_TCP_LOCAL -DZDTM_CONNTRACK
+socket-tcp-ipt-nfconntrack: CFLAGS += -D ZDTM_TCP_LOCAL -DZDTM_IPT_CONNTRACK
+socket-tcp-nft-nfconntrack: CFLAGS += -D ZDTM_TCP_LOCAL -DZDTM_NFT_CONNTRACK
 socket_listen6:		CFLAGS += -D ZDTM_IPV6
 socket_listen4v6:	CFLAGS += -D ZDTM_IPV4V6
 socket-tcp6-closed:	CFLAGS += -D ZDTM_IPV6
 socket-tcp6-closed:	CFLAGS += -D ZDTM_IPV4V6
 socket-tcp-closed-last-ack:	CFLAGS += -D ZDTM_TCP_LAST_ACK
 socket-tcp-skip-in-flight:	CFLAGS += -D ZDTM_IPV4V6
+socket6-icmp:		CFLAGS += -DZDTM_IPV6
+sock_ip_opts01:		CFLAGS += -DZDTM_VAL_ZERO
+sock_tcp_opts01:	CFLAGS += -DZDTM_VAL_ZERO
 tun_ns:			CFLAGS += -DTUN_NS
 mnt_ext_manual:		CFLAGS += -D ZDTM_EXTMAP_MANUAL
 mntns_pivot_root_ro:	CFLAGS += -DMNTNS_PIVOT_ROOT_RO
@@ -593,6 +650,7 @@ vdso01:			LDLIBS += -lrt
 scm01:			CFLAGS += -DKEEP_SENT_FD
 scm02:			CFLAGS += -DSEND_BOTH
 scm04:			CFLAGS += -DSEPARATE
+scm09:			CFLAGS += -DCLOSE_SENDER_FD
 mntns_link_remap:	CFLAGS += -DZDTM_LINK_REMAP
 mntns_shared_bind02:	CFLAGS += -DSHARED_BIND02
 mntns_root_bind02:	CFLAGS += -DROOT_BIND02
@@ -605,6 +663,7 @@ unlink_fstat04:		CFLAGS += -DUNLINK_FSTAT04
 unlink_fstat041:		CFLAGS += -DUNLINK_FSTAT041 -DUNLINK_FSTAT04
 ghost_holes01:		CFLAGS += -DTAIL_HOLE
 ghost_holes02:		CFLAGS += -DHEAD_HOLE
+ghost_holes_large01:		CFLAGS += -DLIMIT
 sk-freebind-false:	CFLAGS += -DZDTM_FREEBIND_FALSE
 selinux02:		CFLAGS += -DUSING_SOCKCREATE
 stopped01:		CFLAGS += -DZDTM_STOPPED_KILL
@@ -635,6 +694,7 @@ socket-tcp6-unconn:	CFLAGS += -D ZDTM_IPV6
 socket-tcp4v6-last-ack:	CFLAGS += -D ZDTM_TCP_LAST_ACK -D ZDTM_IPV4V6
 socket-tcp4v6-closing:	CFLAGS += -D ZDTM_IPV4V6
 memfd02-hugetlb:	CFLAGS += -D ZDTM_HUGETLB
+memfd05:		CFLAGS += -D ZDTM_MEMFD05
 
 sockets00-seqpacket:	CFLAGS += -D ZDTM_UNIX_SEQPACKET
 sockets01-seqpacket:	CFLAGS += -D ZDTM_UNIX_SEQPACKET
@@ -658,11 +718,22 @@ s390x_gs_threads:	LDFLAGS += -pthread
 
 thread_different_uid_gid:	LDLIBS += -pthread -lcap
 
+cgroup_threads:		LDFLAGS += -pthread
+
 bpf_hash:		LDLIBS += -lbpf
 bpf_array:		LDLIBS += -lbpf
 
 fifo_upon_unix_socket01:	CFLAGS += -DFIFO_UPON_UNIX01
 
+sk-unix-listen02: CFLAGS += -DSK_UNIX_LISTEN02
+sk-unix-listen03: CFLAGS += -DSK_UNIX_LISTEN03
+sk-unix-listen04: CFLAGS += -DSK_UNIX_LISTEN02 -DSK_UNIX_LISTEN03
+
+cgroupv2_01:		LDLIBS += -pthread
+
+uprobes:		CFLAGS += $(call pkg-cflags, libtracefs libtraceevent)
+uprobes:		LDLIBS += $(call pkg-libs, libtracefs libelf)
+
 $(LIB):	force
 	$(Q) $(MAKE) -C $(LIBDIR)
 
@@ -677,7 +748,7 @@ criu-rtc.pb-c.c: criu-rtc.proto
 	$(Q)echo $@ >> .gitignore
 	$(Q)echo $(@:%.c=%.h) >> .gitignore
 	$(E) " PBCC     " $@
-	$(Q)protoc-c --proto_path=. --c_out=. criu-rtc.proto
+	$(Q)protoc --proto_path=. --c_out=. criu-rtc.proto
 
 criu-rtc.so: criu-rtc.c criu-rtc.pb-c.c
 	$(E) " LD       " $@
diff --git a/test/zdtm/static/apparmor.c b/test/zdtm/static/apparmor.c
index 713ffaa46..dc1636821 100644
--- a/test/zdtm/static/apparmor.c
+++ b/test/zdtm/static/apparmor.c
@@ -59,7 +59,7 @@ int checkprofile(void)
 		return -1;
 	}
 
-	len = fscanf(f, "%[^ \n]s", profile);
+	len = fscanf(f, "%1023[^ \n]s", profile);
 	fclose(f);
 	if (len != 1) {
 		fail("wrong number of items scanned %d", len);
diff --git a/test/zdtm/static/apparmor_stacking.c b/test/zdtm/static/apparmor_stacking.c
index 76de8b8b4..0bc36048c 100644
--- a/test/zdtm/static/apparmor_stacking.c
+++ b/test/zdtm/static/apparmor_stacking.c
@@ -56,7 +56,7 @@ static int checkprofile(pid_t pid, char *expected)
 		return -1;
 	}
 
-	len = fscanf(f, "%[^ \n]s", profile);
+	len = fscanf(f, "%1023[^ \n]s", profile);
 	fclose(f);
 	if (len != 1) {
 		fail("wrong number of items scanned %d", len);
diff --git a/test/zdtm/static/caps01.c b/test/zdtm/static/caps01.c
new file mode 100644
index 000000000..0f8a7101e
--- /dev/null
+++ b/test/zdtm/static/caps01.c
@@ -0,0 +1,168 @@
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/prctl.h>
+#include <linux/capability.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Check that CapAmb are preserved";
+const char *test_author = "Liu Chao <liuchao173@huawei.com>";
+
+struct cap_hdr {
+	unsigned int version;
+	int pid;
+};
+
+struct cap_data {
+	unsigned int eff;
+	unsigned int prm;
+	unsigned int inh;
+};
+
+#define _LINUX_CAPABILITY_VERSION_3 0x20080522
+#define _LINUX_CAPABILITY_U32S_3    2
+#define CAP_DAC_OVERRIDE	    1
+#define PR_CAP_AMBIENT		    47
+#define PR_CAP_AMBIENT_IS_SET	    1
+#define PR_CAP_AMBIENT_RAISE	    2
+#define PR_CAP_AMBIENT_LOWER	    3
+
+int capget(struct cap_hdr *hdrp, struct cap_data *datap);
+int capset(struct cap_hdr *hdrp, const struct cap_data *datap);
+
+static int cap_last_cap = 63;
+
+int main(int argc, char **argv)
+{
+	task_waiter_t t;
+	int pid, result_pipe[2];
+	unsigned int amb[_LINUX_CAPABILITY_U32S_3];
+	unsigned int amb_2[_LINUX_CAPABILITY_U32S_3];
+	char res = 'x';
+	FILE *f;
+
+	test_init(argc, argv);
+	task_waiter_init(&t);
+
+	f = fopen("/proc/sys/kernel/cap_last_cap", "r");
+	if (f) {
+		if (fscanf(f, "%d", &cap_last_cap) != 1) {
+			pr_perror("Unable to read cal_last_cap");
+			fclose(f);
+			return 1;
+		}
+		fclose(f);
+	} else
+		test_msg("/proc/sys/kernel/cap_last_cap is not available\n");
+
+	if (pipe(result_pipe)) {
+		pr_perror("Can't create pipe");
+		return 1;
+	}
+
+	pid = test_fork();
+	if (pid == 0) {
+		int b, i, ret;
+		struct cap_hdr hdr;
+		struct cap_data data[_LINUX_CAPABILITY_U32S_3];
+
+		hdr.version = _LINUX_CAPABILITY_VERSION_3;
+		hdr.pid = 0;
+
+		if (capget(&hdr, data) < 0) {
+			pr_perror("capget");
+			return -1;
+		}
+
+		hdr.version = _LINUX_CAPABILITY_VERSION_3;
+		hdr.pid = 0;
+
+		data[0].eff &= ~((1 << CAP_CHOWN) | (1 << CAP_DAC_OVERRIDE));
+		data[0].prm &= ~(1 << CAP_DAC_OVERRIDE);
+		data[0].inh = data[0].prm;
+		data[1].inh = data[1].prm;
+
+		if (capset(&hdr, data) < 0) {
+			pr_perror("capset");
+			return -1;
+		}
+
+		for (b = 0; b < _LINUX_CAPABILITY_U32S_3; b++) {
+			amb[b] = data[b].prm;
+			for (i = 0; i < 32; i++) {
+				if (b * 32 + i > cap_last_cap)
+					break;
+				if ((amb[b] & (1 << i)) > 0)
+					ret = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i + b * 32, 0, 0);
+				else
+					ret = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, i + b * 32, 0, 0);
+				if (ret) {
+					pr_perror("Unable to set ambient capability %d to %d: %d", i + b * 32, amb[b] & (1 << i), ret);
+					return -1;
+				}
+			}
+		}
+
+		task_waiter_complete_current(&t);
+		task_waiter_wait4(&t, getppid());
+
+		for (b = 0; b < _LINUX_CAPABILITY_U32S_3; b++) {
+			amb_2[b] = 0;
+			for (i = 0; i < 32; i++) {
+				if (b * 32 + i > cap_last_cap)
+					break;
+				ret = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, i + b * 32, 0, 0);
+				if (ret < 0) {
+					pr_perror("Unable to read ambient capability %d: %d", i + b * 32, ret);
+					goto bad;
+				}
+
+				amb_2[b] |= (ret << i);
+			}
+		}
+
+		for (b = 0; b < _LINUX_CAPABILITY_U32S_3; b++) {
+			if (amb[b] != amb_2[b]) {
+				res = '1';
+				goto bad;
+			}
+		}
+
+		res = '0';
+	bad:
+		write(result_pipe[1], &res, 1);
+
+		if (res != '0') {
+			write(result_pipe[1], amb, sizeof(amb));
+			write(result_pipe[1], amb_2, sizeof(amb_2));
+		}
+
+		close(result_pipe[0]);
+		close(result_pipe[1]);
+		_exit(0);
+	}
+
+	task_waiter_wait4(&t, pid);
+
+	test_daemon();
+	test_waitsig();
+
+	task_waiter_complete_current(&t);
+
+	read(result_pipe[0], &res, 1);
+
+	if (res == '0')
+		pass();
+	else {
+		read(result_pipe[0], amb, sizeof(amb));
+		read(result_pipe[0], amb_2, sizeof(amb_2));
+		test_msg("amb[]=%08x, %08x\n", amb[0], amb[1]);
+		test_msg("amb[]=%08x, %08x\n", amb_2[0], amb_2[1]);
+		fail("Fail: %c", res);
+	}
+	close(result_pipe[0]);
+	close(result_pipe[1]);
+
+	return 0;
+}
diff --git a/test/zdtm/static/caps01.desc b/test/zdtm/static/caps01.desc
new file mode 100644
index 000000000..2eac7e654
--- /dev/null
+++ b/test/zdtm/static/caps01.desc
@@ -0,0 +1 @@
+{'flags': 'suid'}
diff --git a/test/zdtm/static/cgroup00.desc b/test/zdtm/static/cgroup00.desc
index 3c6c4a7e2..42a3f2b73 100644
--- a/test/zdtm/static/cgroup00.desc
+++ b/test/zdtm/static/cgroup00.desc
@@ -1 +1 @@
-{'flavor': 'h', 'flags': 'suid', 'opts': '--manage-cgroups'}
+{'flavor': 'h', 'flags': 'suid excl', 'opts': '--manage-cgroups'}
diff --git a/test/zdtm/static/cgroup01.c b/test/zdtm/static/cgroup01.c
index bc8515264..7bfb67762 100644
--- a/test/zdtm/static/cgroup01.c
+++ b/test/zdtm/static/cgroup01.c
@@ -79,7 +79,7 @@ int main(int argc, char **argv)
 		if (!s)
 			continue;
 
-		sscanf(paux, "%*d %*d %*d:%*d %*s %s", aux);
+		sscanf(paux, "%*d %*d %*d:%*d %*s %1023s", aux);
 		test_msg("found cgroup at %s\n", aux);
 
 		for (i = 0; i < 2; i++) {
diff --git a/test/zdtm/static/cgroup01.desc b/test/zdtm/static/cgroup01.desc
index 3c6c4a7e2..42a3f2b73 100644
--- a/test/zdtm/static/cgroup01.desc
+++ b/test/zdtm/static/cgroup01.desc
@@ -1 +1 @@
-{'flavor': 'h', 'flags': 'suid', 'opts': '--manage-cgroups'}
+{'flavor': 'h', 'flags': 'suid excl', 'opts': '--manage-cgroups'}
diff --git a/test/zdtm/static/cgroup02.c b/test/zdtm/static/cgroup02.c
index 6229a8a08..8a925c0a4 100644
--- a/test/zdtm/static/cgroup02.c
+++ b/test/zdtm/static/cgroup02.c
@@ -75,7 +75,7 @@ bool test_exists(char *mountinfo_line, char *path)
 	char aux[1024], paux[1024];
 	struct stat st;
 
-	sscanf(mountinfo_line, "%*d %*d %*d:%*d %*s %s", aux);
+	sscanf(mountinfo_line, "%*d %*d %*d:%*d %*s %1023s", aux);
 	test_msg("found cgroup at %s\n", aux);
 
 	ssprintf(paux, "%s/%s", aux, path);
diff --git a/test/zdtm/static/cgroup02.desc b/test/zdtm/static/cgroup02.desc
index df17a5789..eb5a9dd37 100644
--- a/test/zdtm/static/cgroup02.desc
+++ b/test/zdtm/static/cgroup02.desc
@@ -1,4 +1,4 @@
 {   'dopts': '--manage-cgroups --cgroup-root name=zdtmtst:/prefix',
-    'flags': 'suid',
+    'flags': 'suid excl',
     'flavor': 'h',
     'ropts': '--manage-cgroups --cgroup-root /newroot --cgroup-root name=zdtmtst:/prefix'}
diff --git a/test/zdtm/static/cgroup04.c b/test/zdtm/static/cgroup04.c
index 5a424be12..f586a0628 100644
--- a/test/zdtm/static/cgroup04.c
+++ b/test/zdtm/static/cgroup04.c
@@ -17,45 +17,25 @@ const char *test_author = "Tycho Andersen <tycho.andersen@canonical.com>";
 
 char *dirname;
 TEST_OPTION(dirname, string, "cgroup directory name", 1);
-static const char *cgname = "zdtmtst";
-
-int write_value(const char *path, const char *value)
-{
-	int fd, l;
-
-	fd = open(path, O_WRONLY);
-	if (fd < 0) {
-		pr_perror("open %s", path);
-		return -1;
-	}
-
-	l = write(fd, value, strlen(value));
-	close(fd);
-	if (l < 0) {
-		pr_perror("failed to write %s to %s", value, path);
-		return -1;
-	}
-
-	return 0;
-}
+static const char *const cgname = "zdtmtst";
 
 int mount_and_add(const char *controller, const char *path, const char *prop, const char *value)
 {
 	char aux[1024], paux[1024], subdir[1024];
 
 	if (mkdir(dirname, 0700) < 0 && errno != EEXIST) {
-		pr_perror("Can't make dir");
+		pr_perror("Can't make dir %s", dirname);
 		return -1;
 	}
 
 	sprintf(subdir, "%s/%s", dirname, controller);
 	if (mkdir(subdir, 0700) < 0) {
-		pr_perror("Can't make dir");
+		pr_perror("Can't make dir %s", subdir);
 		return -1;
 	}
 
 	if (mount("none", subdir, "cgroup", 0, controller)) {
-		pr_perror("Can't mount cgroups");
+		pr_perror("Can't mount cgroup controller %s at %s", controller, subdir);
 		goto err_rd;
 	}
 
@@ -72,7 +52,8 @@ int mount_and_add(const char *controller, const char *path, const char *prop, co
 		goto err_rs;
 
 	ssprintf(paux, "%s/%s/special_prop_check", subdir, path);
-	mkdir(paux, 0600);
+	if (mkdir(paux, 0600) < 0)
+		pr_perror("Can't make dir %s", paux);
 
 	return 0;
 err_rs:
@@ -94,11 +75,11 @@ bool checkval(char *path, char *val)
 	}
 
 	n = read(fd, buf, sizeof(buf) - 1);
+	if (n < 0)
+		pr_perror("read %s", path);
 	close(fd);
-	if (n < 0) {
-		pr_perror("read");
+	if (n < 0)
 		return false;
-	}
 	buf[n] = 0;
 
 	if (strcmp(val, buf)) {
@@ -115,7 +96,7 @@ int main(int argc, char **argv)
 	char buf[1024], path[PATH_MAX];
 	struct stat sb;
 
-	char *dev_allow[] = {
+	const char *const dev_allow[] = {
 		"c *:* m",   "b *:* m",	  "c 1:3 rwm", "c 1:5 rwm",   "c 1:7 rwm",    "c 5:0 rwm",
 		"c 5:2 rwm", "c 1:8 rwm", "c 1:9 rwm", "c 136:* rwm", "c 10:229 rwm",
 	};
@@ -146,12 +127,14 @@ int main(int argc, char **argv)
 
 	sprintf(path, "%s/devices/%s/devices.list", dirname, cgname);
 	if (!checkval(path, buf)) {
+		errno = 0;
 		fail();
 		goto out;
 	}
 
 	sprintf(path, "%s/memory/%s/memory.limit_in_bytes", dirname, cgname);
 	if (!checkval(path, "268435456\n")) {
+		errno = 0;
 		fail();
 		goto out;
 	}
@@ -163,6 +146,7 @@ int main(int argc, char **argv)
 	}
 
 	if (!S_ISDIR(sb.st_mode)) {
+		errno = 0;
 		fail("special_prop_check not a directory?");
 		goto out;
 	}
diff --git a/test/zdtm/static/cgroup04.checkskip b/test/zdtm/static/cgroup04.checkskip
index 205f8fc53..1ccbada4d 100755
--- a/test/zdtm/static/cgroup04.checkskip
+++ b/test/zdtm/static/cgroup04.checkskip
@@ -1,3 +1,20 @@
 #!/bin/bash
+set -e
 
-! test -f /sys/fs/cgroup/cgroup.controllers
+test ! -f /sys/fs/cgroup/cgroup.controllers
+
+for ctl in devices memory; do
+	# Check that the controller is available.
+
+	grep -q "^${ctl}\\s" /proc/cgroups
+
+	# Check that the controller is not co-mounted with any other.
+
+	# /proc/self/cgroup may have:
+	# "1:devices:/sys"
+	if ! grep -q "^[0-9]*:${ctl}:" /proc/self/cgroup; then
+		# but not eg:
+		# "1:devices,job:/sys"
+		grep -qE "^[0-9]*:([^:]*,)?${ctl}(,[^:]*)?:" /proc/self/cgroup && exit 1
+	fi
+done
diff --git a/test/zdtm/static/cgroup_ifpriomap.checkskip b/test/zdtm/static/cgroup_ifpriomap.checkskip
index 205f8fc53..f401ad1b2 100755
--- a/test/zdtm/static/cgroup_ifpriomap.checkskip
+++ b/test/zdtm/static/cgroup_ifpriomap.checkskip
@@ -1,3 +1,6 @@
 #!/bin/bash
+set -e
 
-! test -f /sys/fs/cgroup/cgroup.controllers
+test ! -f /sys/fs/cgroup/cgroup.controllers
+
+grep -q '^net_prio\s' /proc/cgroups
diff --git a/test/zdtm/static/cgroup_stray.c b/test/zdtm/static/cgroup_stray.c
index 0c0ed93cf..f5754410f 100644
--- a/test/zdtm/static/cgroup_stray.c
+++ b/test/zdtm/static/cgroup_stray.c
@@ -135,7 +135,7 @@ out:
 int main(int argc, char **argv)
 {
 	int ret = -1, sk_pair[2], sk, status;
-	char path[PATH_MAX], c;
+	char path[PATH_MAX], c = 0;
 	pid_t pid = 0;
 
 	test_init(argc, argv);
diff --git a/test/zdtm/static/cgroup_threads.c b/test/zdtm/static/cgroup_threads.c
new file mode 100644
index 000000000..2c17e13a7
--- /dev/null
+++ b/test/zdtm/static/cgroup_threads.c
@@ -0,0 +1,184 @@
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include "zdtmtst.h"
+
+const char *test_doc = "Check that cgroup layout of threads is preserved";
+const char *test_author = "Michał Cłapiński <mclapinski@google.com>";
+
+char *dirname;
+TEST_OPTION(dirname, string, "cgroup directory name", 1);
+static const char *cgname = "zdtmtst";
+#define SUBNAME	 "subcg_threads"
+#define SUBNAME2 SUBNAME "/subsubcg"
+
+#define exit_group(code) syscall(__NR_exit_group, code)
+
+static int cg_move(char *name)
+{
+	int cgfd, l;
+	char paux[256];
+
+	sprintf(paux, "%s/%s", dirname, name);
+	if (mkdir(paux, 0600)) {
+		pr_perror("Can't create %s", paux);
+		return -1;
+	}
+
+	sprintf(paux, "%s/%s/tasks", dirname, name);
+
+	cgfd = open(paux, O_WRONLY);
+	if (cgfd < 0) {
+		pr_perror("Can't open tasks");
+		return -1;
+	}
+
+	l = write(cgfd, "0", 2);
+	close(cgfd);
+
+	if (l < 0) {
+		pr_perror("Can't move self to subcg");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int cg_check(char *name)
+{
+	int found = 0;
+	FILE *cgf;
+	char paux[256], aux[128];
+
+	cgf = fopen("/proc/thread-self/cgroup", "r");
+	if (cgf == NULL)
+		return -1;
+
+	sprintf(aux, "name=%s:/%s", cgname, name);
+	while (fgets(paux, sizeof(paux), cgf)) {
+		char *s;
+
+		s = strchr(paux, ':') + 1;
+		s[strlen(s) - 1] = '\0';
+		test_msg("CMP [%s] vs [%s]\n", s, aux);
+		if (!strcmp(s, aux)) {
+			found = 1;
+			break;
+		}
+	}
+
+	fclose(cgf);
+
+	return found ? 0 : -1;
+}
+
+int th_sync[2], rst_sync[2];
+
+void *thread_fn(void *args)
+{
+	int status = cg_move(SUBNAME2);
+
+	if (write(th_sync[1], &status, sizeof(status)) != sizeof(status)) {
+		pr_perror("write");
+		exit_group(1);
+	}
+
+	if (status == 0) {
+		if (read(rst_sync[0], &status, sizeof(status)) < 0) {
+			pr_perror("read");
+			exit_group(1);
+		}
+
+		status = cg_check(SUBNAME2);
+		if (write(th_sync[1], &status, sizeof(status)) != sizeof(status)) {
+			pr_perror("write");
+			exit_group(1);
+		}
+	}
+
+	pthread_exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	int status, exit_code = 1;
+	pthread_t thread;
+	char aux[64];
+
+	test_init(argc, argv);
+
+	/*
+	 * Pipe to talk to the kid.
+	 * First, it reports that it's ready (int),
+	 * then it reports the restore status (int).
+	 */
+
+	if (pipe(th_sync)) {
+		pr_perror("pipe");
+		return 1;
+	}
+
+	/* "Restore happened" pipe */
+	if (pipe(rst_sync)) {
+		pr_perror("pipe");
+		return 1;
+	}
+
+	if (mkdir(dirname, 0700) < 0) {
+		pr_perror("Can't make dir");
+		goto out;
+	}
+
+	sprintf(aux, "none,name=%s", cgname);
+	if (mount("none", dirname, "cgroup", 0, aux)) {
+		pr_perror("Can't mount cgroups");
+		goto out_rd;
+	}
+
+	if (cg_move(SUBNAME))
+		goto out_rs;
+
+	if (pthread_create(&thread, NULL, thread_fn, NULL)) {
+		pr_perror("Can't create a new thread");
+		goto out_rs;
+	}
+
+	status = -1;
+	read(th_sync[0], &status, sizeof(status));
+	if (status != 0) {
+		pr_perror("Error moving into cgroups");
+		close(rst_sync[0]);
+		goto out_rs;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	close(rst_sync[1]);
+
+	status = -1;
+	if (read(th_sync[0], &status, sizeof(status)) < 0) {
+		pr_perror("read");
+		goto out_rs;
+	}
+	if (status != 0) {
+		fail("child cg changed");
+		goto out_rs;
+	}
+
+	pass();
+	exit_code = 0;
+
+out_rs:
+	umount(dirname);
+out_rd:
+	rmdir(dirname);
+out:
+	return exit_code;
+}
diff --git a/test/zdtm/static/cgroup_threads.desc b/test/zdtm/static/cgroup_threads.desc
new file mode 100644
index 000000000..42a3f2b73
--- /dev/null
+++ b/test/zdtm/static/cgroup_threads.desc
@@ -0,0 +1 @@
+{'flavor': 'h', 'flags': 'suid excl', 'opts': '--manage-cgroups'}
diff --git a/test/zdtm/static/cgroup_threads.hook b/test/zdtm/static/cgroup_threads.hook
new file mode 100755
index 000000000..f4b553d34
--- /dev/null
+++ b/test/zdtm/static/cgroup_threads.hook
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+set -e
+
+[ "$1" == "--clean" -o "$1" == "--pre-restore" ] || exit 0
+
+tname=$(mktemp -d cgclean.XXXXXX)
+trap 'rmdir "${tname}"' EXIT
+
+mount -t cgroup none $tname -o "none,name=zdtmtst"
+trap 'umount "${tname}"; rmdir "${tname}"' EXIT
+
+echo "Cleaning $tname"
+
+rmdir "$tname/subcg_threads/subsubcg/" || true
+rmdir "$tname/subcg_threads/" || true
+
+echo "Left there is:"
+ls "$tname"
diff --git a/test/zdtm/static/cgroup_yard.desc b/test/zdtm/static/cgroup_yard.desc
index 8736d6780..9ad4a9b57 100644
--- a/test/zdtm/static/cgroup_yard.desc
+++ b/test/zdtm/static/cgroup_yard.desc
@@ -1,6 +1,6 @@
 {
 'flavor': 'h',
-'flags': 'suid',
+'flags': 'suid excl',
 # We create the external cgroup yard in working directory during --pre-dump
 # hook. We have to go up a few directories to find the yard.
 'opts': '--manage-cgroups --cgroup-yard ../../../../../../external_yard'
diff --git a/test/zdtm/static/cgroup_yard.hook b/test/zdtm/static/cgroup_yard.hook
index d06bc45fd..b70bd59e9 100755
--- a/test/zdtm/static/cgroup_yard.hook
+++ b/test/zdtm/static/cgroup_yard.hook
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import sys
 import os
diff --git a/test/zdtm/static/cgroupns.desc b/test/zdtm/static/cgroupns.desc
index 80dd710e1..dc61e36cf 100644
--- a/test/zdtm/static/cgroupns.desc
+++ b/test/zdtm/static/cgroupns.desc
@@ -1,4 +1,4 @@
 {   'feature': 'cgroupns',
-    'flags': 'suid',
+    'flags': 'suid excl',
     'flavor': 'h',
     'opts': '--manage-cgroups'}
diff --git a/test/zdtm/static/cgroupv2_00.c b/test/zdtm/static/cgroupv2_00.c
new file mode 100644
index 000000000..2c6780e0c
--- /dev/null
+++ b/test/zdtm/static/cgroupv2_00.c
@@ -0,0 +1,86 @@
+#include <sys/mount.h>
+#include <sys/stat.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Check that some cgroup-v2 properties in kernel controllers are preserved";
+const char *test_author = "Bui Quang Minh <minhquangbui99@gmail.com>";
+
+char *dirname;
+TEST_OPTION(dirname, string, "cgroup-v2 directory name", 1);
+const char *cgname = "subcg00";
+
+int main(int argc, char **argv)
+{
+	char path[1024], aux[1024];
+	int ret = -1;
+
+	test_init(argc, argv);
+
+	if (mkdir(dirname, 0700) < 0 && errno != EEXIST) {
+		pr_perror("Can't make dir");
+		return -1;
+	}
+
+	if (mount("cgroup2", dirname, "cgroup2", 0, NULL)) {
+		pr_perror("Can't mount cgroup-v2");
+		return -1;
+	}
+
+	sprintf(path, "%s/%s", dirname, cgname);
+	if (mkdir(path, 0700) < 0 && errno != EEXIST) {
+		pr_perror("Can't make dir");
+		goto out;
+	}
+
+	/* Make cpuset controllers available in children directory */
+	sprintf(path, "%s/%s", dirname, "cgroup.subtree_control");
+	sprintf(aux, "%s", "+cpuset");
+	if (write_value(path, aux))
+		goto out;
+
+	sprintf(path, "%s/%s/%s", dirname, cgname, "cgroup.subtree_control");
+	sprintf(aux, "%s", "+cpuset");
+	if (write_value(path, aux))
+		goto out;
+
+	sprintf(path, "%s/%s/%s", dirname, cgname, "cgroup.type");
+	sprintf(aux, "%s", "threaded");
+	if (write_value(path, aux))
+		goto out;
+
+	sprintf(path, "%s/%s/%s", dirname, cgname, "cgroup.procs");
+	sprintf(aux, "%d", getpid());
+	if (write_value(path, aux))
+		goto out;
+
+	test_daemon();
+	test_waitsig();
+
+	sprintf(path, "%s/%s/%s", dirname, cgname, "cgroup.subtree_control");
+	if (read_value(path, aux, sizeof(aux)))
+		goto out;
+
+	if (strcmp(aux, "cpuset\n")) {
+		fail("cgroup.subtree_control mismatches");
+		goto out;
+	}
+
+	sprintf(path, "%s/%s/%s", dirname, cgname, "cgroup.type");
+	if (read_value(path, aux, sizeof(aux)))
+		goto out;
+
+	if (strcmp(aux, "threaded\n")) {
+		fail("cgroup.type mismatches");
+		goto out;
+	}
+
+	pass();
+
+	ret = 0;
+
+out:
+	sprintf(path, "%s", dirname);
+	umount(path);
+	return ret;
+}
diff --git a/test/zdtm/static/cgroupv2_00.checkskip b/test/zdtm/static/cgroupv2_00.checkskip
new file mode 100755
index 000000000..375ed3564
--- /dev/null
+++ b/test/zdtm/static/cgroupv2_00.checkskip
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+if [ -f /sys/fs/cgroup/cgroup.controllers ]; then
+	grep -q "cpuset" /sys/fs/cgroup/cgroup.controllers && exit 0
+fi
+
+if [ -d /sys/fs/cgroup/unified ]; then
+	grep -q "cpuset" /sys/fs/cgroup/unified/cgroup.controllers && exit 0
+fi
+
+exit 1
diff --git a/test/zdtm/static/cgroupv2_00.desc b/test/zdtm/static/cgroupv2_00.desc
new file mode 100644
index 000000000..e70c84df8
--- /dev/null
+++ b/test/zdtm/static/cgroupv2_00.desc
@@ -0,0 +1 @@
+{'flavor': 'h ns', 'flags': 'suid excl', 'opts': '--manage-cgroups=full'}
diff --git a/test/zdtm/static/cgroupv2_00.hook b/test/zdtm/static/cgroupv2_00.hook
new file mode 100755
index 000000000..1002b1ec5
--- /dev/null
+++ b/test/zdtm/static/cgroupv2_00.hook
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+[ "$1" == "--clean" -o "$1" == "--pre-restore" ] || exit 0
+
+set -e
+cgname="subcg00"
+tname=$(mktemp -d cgclean.XXXXXX)
+mount -t cgroup2 cgroup2 $tname
+
+echo "Cleaning $tname"
+echo "-cpuset" > "$tname/$cgname/cgroup.subtree_control"
+
+set +e
+rmdir "$tname/$cgname"
+umount "$tname"
+rmdir "$tname"
diff --git a/test/zdtm/static/cgroupv2_01.c b/test/zdtm/static/cgroupv2_01.c
new file mode 100644
index 000000000..f3a6d18ba
--- /dev/null
+++ b/test/zdtm/static/cgroupv2_01.c
@@ -0,0 +1,180 @@
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <pthread.h>
+#include <syscall.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Check that cgroup-v2 threaded controllers";
+const char *test_author = "Bui Quang Minh <minhquangbui99@gmail.com>";
+
+char *dirname;
+TEST_OPTION(dirname, string, "cgroup-v2 directory name", 1);
+const char *cgname = "subcg01";
+
+task_waiter_t t;
+
+#define gettid(code) syscall(__NR_gettid)
+
+void cleanup(void)
+{
+	char path[1024];
+
+	sprintf(path, "%s/%s/%s", dirname, cgname, "thread2");
+	rmdir(path);
+	sprintf(path, "%s/%s/%s", dirname, cgname, "thread1");
+	rmdir(path);
+	sprintf(path, "%s/%s", dirname, cgname);
+	rmdir(path);
+	sprintf(path, "%s", dirname);
+	umount(path);
+}
+
+int is_in_cgroup(char *cgname)
+{
+	FILE *cgf;
+	char buffer[1024];
+
+	sprintf(buffer, "/proc/self/task/%ld/cgroup", gettid());
+	cgf = fopen(buffer, "r");
+	if (cgf == NULL) {
+		pr_err("Fail to open thread's cgroup procfs\n");
+		return 0;
+	}
+
+	while (fgets(buffer, sizeof(buffer), cgf)) {
+		if (strstr(buffer, cgname)) {
+			fclose(cgf);
+			return 1;
+		}
+	}
+
+	fclose(cgf);
+	return 0;
+}
+
+void *thread_func(void *arg)
+{
+	char path[1024], aux[1024];
+
+	sprintf(path, "%s/%s/%s/%s", dirname, cgname, "thread2", "cgroup.threads");
+	sprintf(aux, "%ld", gettid());
+	if (write_value(path, aux)) {
+		cleanup();
+		exit(1);
+	}
+
+	read_value(path, aux, sizeof(aux));
+
+	task_waiter_complete(&t, 1);
+
+	/* Wait for restore */
+	task_waiter_wait4(&t, 2);
+
+	sprintf(path, "/%s/%s", cgname, "thread2");
+	if (!is_in_cgroup(path)) {
+		fail("Thread2's cgroup is not restored");
+		cleanup();
+		exit(1);
+	}
+
+	return NULL;
+}
+
+int main(int argc, char **argv)
+{
+	char path[1024], aux[1024];
+	pthread_t thread2;
+	int ret = 1;
+
+	test_init(argc, argv);
+	task_waiter_init(&t);
+
+	if (mkdir(dirname, 0700) < 0 && errno != EEXIST) {
+		pr_perror("Can't make dir");
+		return -1;
+	}
+
+	if (mount("cgroup2", dirname, "cgroup2", 0, NULL)) {
+		pr_perror("Can't mount cgroup-v2");
+		return -1;
+	}
+
+	sprintf(path, "%s/%s", dirname, cgname);
+	if (mkdir(path, 0700) < 0 && errno != EEXIST) {
+		pr_perror("Can't make dir");
+		goto out;
+	}
+
+	/* Make cpuset controllers available in children directory */
+	sprintf(path, "%s/%s", dirname, "cgroup.subtree_control");
+	sprintf(aux, "%s", "+cpuset");
+	if (write_value(path, aux))
+		goto out;
+
+	sprintf(path, "%s/%s/%s", dirname, cgname, "cgroup.subtree_control");
+	sprintf(aux, "%s", "+cpuset");
+	if (write_value(path, aux))
+		goto out;
+
+	sprintf(path, "%s/%s/%s", dirname, cgname, "cgroup.procs");
+	sprintf(aux, "%d", getpid());
+	if (write_value(path, aux))
+		goto out;
+
+	sprintf(path, "%s/%s/%s", dirname, cgname, "thread1");
+	if (mkdir(path, 0700) < 0 && errno != EEXIST) {
+		pr_perror("Can't make dir");
+		goto out;
+	}
+
+	sprintf(path, "%s/%s/%s/%s", dirname, cgname, "thread1", "cgroup.type");
+	sprintf(aux, "%s", "threaded");
+	if (write_value(path, aux))
+		goto out;
+
+	sprintf(path, "%s/%s/%s", dirname, cgname, "thread2");
+	if (mkdir(path, 0700) < 0 && errno != EEXIST) {
+		pr_perror("Can't make dir");
+		goto out;
+	}
+
+	sprintf(path, "%s/%s/%s/%s", dirname, cgname, "thread2", "cgroup.type");
+	sprintf(aux, "%s", "threaded");
+	if (write_value(path, aux))
+		goto out;
+
+	ret = pthread_create(&thread2, NULL, thread_func, NULL);
+	if (ret < 0) {
+		pr_err("pthread_create %s\n", strerror(ret));
+		ret = 1;
+		goto out;
+	}
+
+	sprintf(path, "%s/%s/%s/%s", dirname, cgname, "thread1", "cgroup.threads");
+	sprintf(aux, "%ld", gettid());
+	if (write_value(path, aux))
+		goto out;
+
+	task_waiter_wait4(&t, 1);
+
+	test_daemon();
+	test_waitsig();
+
+	task_waiter_complete(&t, 2);
+
+	sprintf(path, "/%s/%s", cgname, "thread1");
+	if (!is_in_cgroup(path)) {
+		fail("Main thread's cgroup is not restored");
+		cleanup();
+		exit(1);
+	}
+	pthread_join(thread2, NULL);
+	pass();
+
+	ret = 0;
+
+out:
+	cleanup();
+	return ret;
+}
diff --git a/test/zdtm/static/cgroupv2_01.checkskip b/test/zdtm/static/cgroupv2_01.checkskip
new file mode 100755
index 000000000..375ed3564
--- /dev/null
+++ b/test/zdtm/static/cgroupv2_01.checkskip
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+if [ -f /sys/fs/cgroup/cgroup.controllers ]; then
+	grep -q "cpuset" /sys/fs/cgroup/cgroup.controllers && exit 0
+fi
+
+if [ -d /sys/fs/cgroup/unified ]; then
+	grep -q "cpuset" /sys/fs/cgroup/unified/cgroup.controllers && exit 0
+fi
+
+exit 1
diff --git a/test/zdtm/static/cgroupv2_01.desc b/test/zdtm/static/cgroupv2_01.desc
new file mode 100644
index 000000000..e70c84df8
--- /dev/null
+++ b/test/zdtm/static/cgroupv2_01.desc
@@ -0,0 +1 @@
+{'flavor': 'h ns', 'flags': 'suid excl', 'opts': '--manage-cgroups=full'}
diff --git a/test/zdtm/static/cgroupv2_01.hook b/test/zdtm/static/cgroupv2_01.hook
new file mode 100755
index 000000000..2263fd014
--- /dev/null
+++ b/test/zdtm/static/cgroupv2_01.hook
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+[ "$1" == "--clean" -o "$1" == "--pre-restore" ] || exit 0
+
+set -e
+cgname="subcg01"
+tname=$(mktemp -d cgclean.XXXXXX)
+mount -t cgroup2 cgroup2 $tname
+
+echo "Cleaning $tname"
+
+set +e
+rmdir "$tname/$cgname/thread1"
+
+# When the test finishes, the cleanup() function removes this directory
+# successfully because the thread in this controller exit and no other
+# threads belong to this controller
+if [ "$1" == "--pre-restore" ]; then
+	rmdir "$tname/$cgname/thread2"
+fi
+
+rmdir "$tname/$cgname"
+umount "$tname"
+rmdir "$tname"
diff --git a/test/zdtm/static/change_mnt_context.c b/test/zdtm/static/change_mnt_context.c
index 6d436014b..8787ae5cf 100644
--- a/test/zdtm/static/change_mnt_context.c
+++ b/test/zdtm/static/change_mnt_context.c
@@ -46,7 +46,7 @@ int main(int argc, char **argv)
 		if (!pos)
 			continue;
 
-		result = sscanf(pos, " - %*s %*s %s", opts);
+		result = sscanf(pos, " - %*s %*s %1023s", opts);
 		if (result != 1) {
 			fail("Not able to sscanf line from mountinfo");
 			goto out;
diff --git a/test/zdtm/static/child_opened_proc.c b/test/zdtm/static/child_opened_proc.c
index 2125cd264..cfe04fa4b 100644
--- a/test/zdtm/static/child_opened_proc.c
+++ b/test/zdtm/static/child_opened_proc.c
@@ -10,7 +10,7 @@
 #include "zdtmtst.h"
 
 const char *test_doc = "Check that tree prior to files opening";
-const char *test_author = "Stanislav Kinsbursky <skinsbursky@paralles.com";
+const char *test_author = "Stanislav Kinsbursky <skinsbursky@parallels.com>";
 
 int main(int argc, char **argv)
 {
diff --git a/test/zdtm/static/cow00.c b/test/zdtm/static/cow00.c
index cb0c6733e..456b6a7b4 100644
--- a/test/zdtm/static/cow00.c
+++ b/test/zdtm/static/cow00.c
@@ -29,7 +29,7 @@ static int is_cow(void *addr, pid_t p1, pid_t p2)
 
 	snprintf(buf, sizeof(buf), "/proc/%d/pagemap", p2);
 	fd2 = open(buf, O_RDONLY);
-	if (fd1 < 0) {
+	if (fd2 < 0) {
 		pr_perror("Unable to open file %s", buf);
 		return -1;
 	}
diff --git a/test/zdtm/static/fanotify00.c b/test/zdtm/static/fanotify00.c
index 69ead43e7..0400cc74b 100644
--- a/test/zdtm/static/fanotify00.c
+++ b/test/zdtm/static/fanotify00.c
@@ -22,7 +22,7 @@
 #elif defined(__PPC64__)
 #define __NR_fanotify_init 323
 #define __NR_fanotify_mark 324
-#elif __aarch64__
+#elif (__aarch64__ || __riscv)
 #define __NR_fanotify_init 262
 #define __NR_fanotify_mark 263
 #elif __s390x__
diff --git a/test/zdtm/static/fd_from_pidfd.c b/test/zdtm/static/fd_from_pidfd.c
new file mode 100644
index 000000000..1f863d6c0
--- /dev/null
+++ b/test/zdtm/static/fd_from_pidfd.c
@@ -0,0 +1,108 @@
+#include <sys/syscall.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Check if fd obtained from pidfd_get_fd is C/R correctly\n";
+const char *test_author = "Bhavik Sachdev <b.sachdev1904@gmail.com>";
+
+static int pidfd_open(pid_t pid, unsigned int flags)
+{
+	return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static int pidfd_getfd(int pidfd, int targetfd, unsigned int flags)
+{
+	return syscall(__NR_pidfd_getfd, pidfd, targetfd, flags);
+}
+
+static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags)
+{
+	return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
+}
+
+int main(int argc, char* argv[])
+{
+	#define READ 0
+	#define WRITE 1
+
+	int pidfd, child, p[2], child_read, read_data, status;
+	int data = 42;
+
+	test_init(argc, argv);
+
+	if (pipe(p)) {
+		pr_perror("pipe");
+		return 1;
+	}
+
+	child = fork();
+	if (child < 0) {
+		pr_perror("fork");
+		return 1;
+	}
+
+	if (child == 0) {
+		close(p[WRITE]);
+		test_waitsig();
+		return 0;
+	}
+
+	pidfd = pidfd_open(child, 0);
+	if (pidfd < 0) {
+		pr_perror("pidfd_open failed");
+		return 1;
+	}
+
+	close(p[READ]);
+	if (write(p[WRITE], &data, sizeof(data)) != sizeof(data)) {
+		pr_perror("write");
+		return 1;
+	}
+	close(p[WRITE]);
+
+	child_read = pidfd_getfd(pidfd, p[READ], 0);
+	if (child_read < 0) {
+		pr_perror("pidfd_getfd");
+		return 1;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	if (read(child_read, &read_data, sizeof(read_data)) != sizeof(read_data)) {
+		pr_perror("read");
+		goto err_close;
+	}
+
+	if (read_data != data) {
+		fail("data from fd obtained using pidfd_getfd incorrect");
+		goto err_close;
+	}
+
+	if (pidfd_send_signal(pidfd, SIGTERM, NULL, 0)) {
+		pr_perror("Could not send signal");
+		goto err_close;
+	}
+
+	if (waitpid(child, &status, 0) != child) {
+		pr_perror("waitpid()");
+		return 1;
+	}
+
+	if (status != 0) {
+		fail("%d:%d:%d:%d", WIFEXITED(status), WEXITSTATUS(status), WIFSIGNALED(status), WTERMSIG(status));
+		return 1;
+	}
+
+	pass();
+	close(child_read);
+	close(pidfd);
+	return 0;
+err_close:
+	close(child_read);
+	close(pidfd);
+	return 1;
+}
diff --git a/test/zdtm/static/fd_offset.c b/test/zdtm/static/fd_offset.c
new file mode 100644
index 000000000..96255a4a1
--- /dev/null
+++ b/test/zdtm/static/fd_offset.c
@@ -0,0 +1,42 @@
+#include <fcntl.h>
+
+#include "zdtmtst.h"
+#include "lock.h"
+
+const char *test_doc = "Check that criu properly restores offsets on ELF files";
+const char *test_author = "Michal Clapinski <mclapinski@google.com>";
+
+void check_offset(int fd)
+{
+	int offset = lseek(fd, 0, SEEK_CUR);
+	if (offset < 0) {
+		fail("lseek");
+		exit(1);
+	}
+	if (offset != 0) {
+		fail("wrong offset; expected: 0, got: %d", offset);
+		exit(1);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	test_init(argc, argv);
+
+	fd = open("/proc/self/exe", O_RDONLY);
+	if (fd < 0) {
+		fail("open");
+		exit(1);
+	}
+	check_offset(fd);
+
+	test_daemon();
+	test_waitsig();
+
+	check_offset(fd);
+
+	pass();
+	return 0;
+}
diff --git a/test/zdtm/static/file_locks01.c b/test/zdtm/static/file_locks01.c
index beea171f5..bfdca51d9 100644
--- a/test/zdtm/static/file_locks01.c
+++ b/test/zdtm/static/file_locks01.c
@@ -107,7 +107,7 @@ static int check_file_lock(int fd, char *expected_type, char *expected_option, u
 		memset(fl_type, 0, sizeof(fl_type));
 		memset(fl_option, 0, sizeof(fl_option));
 
-		num = sscanf(buf, "%*s %*d:%s %s %s %d %x:%x:%ld %*d %*s", fl_flag, fl_type, fl_option, &fl_owner, &maj,
+		num = sscanf(buf, "%*s %*d:%15s %15s %15s %d %x:%x:%ld %*d %*s", fl_flag, fl_type, fl_option, &fl_owner, &maj,
 			     &min, &i_no);
 		if (num < 7) {
 			pr_err("Invalid lock info\n");
diff --git a/test/zdtm/static/file_locks02.c b/test/zdtm/static/file_locks02.c
index d2049ebaa..ae4827de9 100644
--- a/test/zdtm/static/file_locks02.c
+++ b/test/zdtm/static/file_locks02.c
@@ -41,7 +41,7 @@ static int check_file_lock(pid_t pid, pid_t child, int fd, char *expected_type,
 		memset(fl_type, 0, sizeof(fl_type));
 		memset(fl_option, 0, sizeof(fl_option));
 
-		num = sscanf(buf, "%*s %*d:%s %s %s %d", fl_flag, fl_type, fl_option, &fl_owner);
+		num = sscanf(buf, "%*s %*d:%15s %15s %15s %d", fl_flag, fl_type, fl_option, &fl_owner);
 		if (num < 4) {
 			pr_perror("Invalid lock info.");
 			break;
diff --git a/test/zdtm/static/file_locks03.c b/test/zdtm/static/file_locks03.c
index 35ef41a21..228e66892 100644
--- a/test/zdtm/static/file_locks03.c
+++ b/test/zdtm/static/file_locks03.c
@@ -41,7 +41,7 @@ static int check_file_lock(pid_t pid, pid_t child, int fd, char *expected_type,
 		memset(fl_type, 0, sizeof(fl_type));
 		memset(fl_option, 0, sizeof(fl_option));
 
-		num = sscanf(buf, "%*s %*d:%s %s %s %d", fl_flag, fl_type, fl_option, &fl_owner);
+		num = sscanf(buf, "%*s %*d:%15s %15s %15s %d", fl_flag, fl_type, fl_option, &fl_owner);
 		if (num < 4) {
 			pr_perror("Invalid lock info.");
 			break;
diff --git a/test/zdtm/static/file_locks04.c b/test/zdtm/static/file_locks04.c
index 11d224fa7..7e0d2654e 100644
--- a/test/zdtm/static/file_locks04.c
+++ b/test/zdtm/static/file_locks04.c
@@ -34,7 +34,7 @@ static int check_file_locks(pid_t child_pid, int fd, int child_fd)
 			continue;
 		test_msg("c: %s", buf);
 
-		num = sscanf(buf, "%*s %*d:%s %s %s %d %*02x:%*02x:%*d %*d %*s", fl_flag, fl_type, fl_option,
+		num = sscanf(buf, "%*s %*d:%15s %15s %15s %d %*02x:%*02x:%*d %*d %*s", fl_flag, fl_type, fl_option,
 			     &fl_owner);
 
 		if (num < 4) {
diff --git a/test/zdtm/static/file_locks06.checkskip b/test/zdtm/static/file_locks06.checkskip
index 06ab58521..c5039a2d2 100755
--- a/test/zdtm/static/file_locks06.checkskip
+++ b/test/zdtm/static/file_locks06.checkskip
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 import fcntl
 import tempfile
 import struct
diff --git a/test/zdtm/static/get_smaps_bits.c b/test/zdtm/static/get_smaps_bits.c
index 31d0d92b2..3d952ac95 100644
--- a/test/zdtm/static/get_smaps_bits.c
+++ b/test/zdtm/static/get_smaps_bits.c
@@ -6,6 +6,10 @@
 #define MAP_HUGETLB 0x40000
 #endif
 
+#ifndef MAP_DROPPABLE
+#define MAP_DROPPABLE 0x08
+#endif
+
 #ifndef MADV_HUGEPAGE
 #define MADV_HUGEPAGE 14
 #endif
@@ -18,6 +22,10 @@
 #define MADV_DONTDUMP 16
 #endif
 
+#ifndef MADV_WIPEONFORK
+#define MADV_WIPEONFORK 18
+#endif
+
 static void parse_vmflags(char *buf, unsigned long *flags, unsigned long *madv)
 {
 	char *tok;
@@ -41,6 +49,8 @@ static void parse_vmflags(char *buf, unsigned long *flags, unsigned long *madv)
 			*flags |= MAP_NORESERVE;
 		else if (_vmflag_match(tok, "ht"))
 			*flags |= MAP_HUGETLB;
+		else if (_vmflag_match(tok, "dp"))
+			*flags |= MAP_DROPPABLE;
 
 		/* madvise() block */
 		if (_vmflag_match(tok, "sr"))
@@ -57,6 +67,8 @@ static void parse_vmflags(char *buf, unsigned long *flags, unsigned long *madv)
 			*madv |= (1ul << MADV_HUGEPAGE);
 		else if (_vmflag_match(tok, "nh"))
 			*madv |= (1ul << MADV_NOHUGEPAGE);
+		else if (_vmflag_match(tok, "wf"))
+			*madv |= (1ul << MADV_WIPEONFORK);
 
 		/*
 		 * Anything else is just ignored.
diff --git a/test/zdtm/static/ghost_holes_large00.c b/test/zdtm/static/ghost_holes_large00.c
new file mode 100644
index 000000000..1a9739f8e
--- /dev/null
+++ b/test/zdtm/static/ghost_holes_large00.c
@@ -0,0 +1,152 @@
+#include <errno.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <string.h>
+#include <linux/limits.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Test ghost with one large hole(1GiB) in the middle";
+const char *test_author = "Liang-Chun Chen <featherclc@gmail.com>";
+
+char *filename;
+TEST_OPTION(filename, string, "file name", 1);
+
+/* Buffer that is suitable for data size */
+#ifdef LIMIT
+#define BUFSIZE 1024 * 1024
+#else
+#define BUFSIZE 4096
+#endif
+static unsigned char buf[BUFSIZE];
+
+#ifndef SEEK_DATA
+#define SEEK_DATA 3
+#define SEEK_HOLE 4
+#endif
+
+#define DATA1_OFF 0
+#define HOLE_SIZE (1LL * 1 * 1024 * 1024 * 1024)
+#define DATA2_OFF (BUFSIZE + HOLE_SIZE)
+#define FILE_SIZE (2 * BUFSIZE + HOLE_SIZE)
+#define ST_UNIT	  512
+
+int main(int argc, char **argv)
+{
+	int fd;
+	struct stat st;
+	uint32_t crc;
+	bool chk_hole = true;
+
+	test_init(argc, argv);
+
+	fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0644);
+	if (fd < 0) {
+		pr_perror("can't open %s", filename);
+		exit(1);
+	}
+
+	if (unlink(filename) < 0) {
+		pr_perror("can't unlink %s", filename);
+		goto failed;
+	}
+
+	crc = ~0;
+	datagen(buf, BUFSIZE, &crc);
+	if (pwrite(fd, buf, BUFSIZE, DATA1_OFF) != BUFSIZE) {
+		pr_perror("can't write data1");
+		goto failed;
+	}
+
+	crc = ~0;
+	datagen(buf, BUFSIZE, &crc);
+	if (pwrite(fd, buf, BUFSIZE, DATA2_OFF) != BUFSIZE) {
+		pr_perror("can't write data2");
+		goto failed;
+	}
+
+	if (ftruncate(fd, FILE_SIZE)) {
+		pr_perror("Can't fixup file size");
+		goto failed;
+	}
+
+	if (lseek(fd, DATA1_OFF, SEEK_HOLE) != DATA1_OFF + BUFSIZE) {
+		test_msg("Won't check for hole\n");
+		chk_hole = false;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	if (fstat(fd, &st) < 0) {
+		fail("can't stat after");
+		goto failed;
+	}
+
+	if (st.st_size != FILE_SIZE) {
+		fail("file size changed to %ld", (long)st.st_size);
+		goto failed;
+	}
+
+	test_msg("file size OK\n");
+
+	if (st.st_blocks * ST_UNIT != 2 * BUFSIZE) {
+		fail("actual file size changed to %ld", (long)st.st_blocks * ST_UNIT);
+		goto failed;
+	}
+
+	test_msg("actual file size OK\n");
+
+	/* Data 1 */
+	if (pread(fd, buf, BUFSIZE, DATA1_OFF) != BUFSIZE) {
+		fail("pread1 fail");
+		goto failed;
+	}
+
+	crc = ~0;
+	if (datachk(buf, BUFSIZE, &crc)) {
+		fail("datachk1 fail");
+		goto failed;
+	}
+
+	test_msg("Data1 OK\n");
+
+	/* Data 2 */
+	if (pread(fd, buf, BUFSIZE, DATA2_OFF) != BUFSIZE) {
+		fail("pread2 fail");
+		goto failed;
+	}
+
+	crc = ~0;
+	if (datachk(buf, BUFSIZE, &crc)) {
+		fail("datachk2 fail");
+		goto failed;
+	}
+
+	test_msg("Data2 OK\n");
+
+	/* Hole */
+	if (chk_hole) {
+		if (lseek(fd, DATA1_OFF, SEEK_HOLE) != DATA1_OFF + BUFSIZE) {
+			fail("Begin of mid hole not found");
+			goto failed;
+		}
+		if (lseek(fd, DATA1_OFF + BUFSIZE, SEEK_DATA) != DATA2_OFF) {
+			fail("End of mid hole not found");
+			goto failed;
+		}
+		test_msg("Mid hole OK\n");
+	}
+
+	close(fd);
+	pass();
+	return 0;
+
+failed:
+	close(fd);
+	return 1;
+}
diff --git a/test/zdtm/static/ghost_holes_large01.c b/test/zdtm/static/ghost_holes_large01.c
new file mode 120000
index 000000000..1b90363d4
--- /dev/null
+++ b/test/zdtm/static/ghost_holes_large01.c
@@ -0,0 +1 @@
+ghost_holes_large00.c
\ No newline at end of file
diff --git a/test/zdtm/static/ghost_holes_large01.desc b/test/zdtm/static/ghost_holes_large01.desc
new file mode 100644
index 000000000..8e6a476bd
--- /dev/null
+++ b/test/zdtm/static/ghost_holes_large01.desc
@@ -0,0 +1 @@
+{'flags': 'crfail'}
\ No newline at end of file
diff --git a/test/zdtm/static/ghost_multi_hole00.c b/test/zdtm/static/ghost_multi_hole00.c
new file mode 100644
index 000000000..0f78d4f14
--- /dev/null
+++ b/test/zdtm/static/ghost_multi_hole00.c
@@ -0,0 +1,122 @@
+#include <errno.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <string.h>
+#include <linux/limits.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Test ghost with a lot of holes(every 8K length contains only 4K data)";
+const char *test_author = "Liang-Chun Chen <featherclc@gmail.com>";
+
+char *filename;
+TEST_OPTION(filename, string, "file name", 1);
+
+/* Buffer that is suitable for hole size */
+#define BUFSIZE 4096
+static unsigned char buf4k[BUFSIZE];
+
+#ifndef SEEK_DATA
+#define SEEK_DATA 3
+#define SEEK_HOLE 4
+#endif
+
+#define FILE_SIZE (1 << 23) /* 8Mb */
+
+#define FILE_INTERVAL (1 << 13) /* 8Kb */
+
+int main(int argc, char **argv)
+{
+	int fd, off;
+	struct stat st;
+	uint32_t crc;
+
+	test_init(argc, argv);
+
+	fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0644);
+	if (fd < 0) {
+		pr_perror("can't open %s", filename);
+		exit(1);
+	}
+
+	if (unlink(filename) < 0) {
+		pr_perror("can't unlink %s", filename);
+		goto failed;
+	}
+
+	for (off = 0; off < FILE_SIZE; off += FILE_INTERVAL) {
+		crc = ~0;
+		datagen(buf4k, BUFSIZE, &crc);
+		if (pwrite(fd, &buf4k, BUFSIZE, off) != BUFSIZE) {
+			perror("pwrite");
+			goto failed;
+		}
+
+		/*
+		* In some file system, such as xfs,
+		* only pwrite might not able to create highly sparse file,
+		* so we need to forcibly allocate hole inside the file.
+		*/
+		if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off + BUFSIZE, BUFSIZE)) {
+			perror("fallocate");
+			goto failed;
+		}
+	}
+
+	if (ftruncate(fd, FILE_SIZE)) {
+		pr_perror("Can't fixup file size");
+		goto failed;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	if (fstat(fd, &st) < 0) {
+		fail("can't stat after");
+		goto failed;
+	}
+
+	if (st.st_size != FILE_SIZE) {
+		fail("file size changed to %ld", (long)st.st_size);
+		goto failed;
+	}
+
+	test_msg("Size %u OK\n", FILE_SIZE);
+
+	/* Data*/
+	for (off = 0; off < FILE_SIZE; off += FILE_INTERVAL) {
+		if (pread(fd, buf4k, BUFSIZE, off) != BUFSIZE) {
+			fail("pread failed @ %u", off / FILE_INTERVAL);
+			goto failed;
+		}
+
+		crc = ~0;
+		if (datachk(buf4k, BUFSIZE, &crc)) {
+			fail("datachk failed @ %u", off / FILE_INTERVAL);
+			goto failed;
+		}
+
+		test_msg("Data @%du OK\n", off / FILE_INTERVAL);
+	}
+
+	/* Hole */
+	for (off = 0; off < FILE_SIZE; off += FILE_INTERVAL) {
+		if (lseek(fd, off, SEEK_HOLE) != off + BUFSIZE) {
+			fail("failed to find hole @ %u", off / FILE_SIZE);
+			goto failed;
+		}
+		test_msg("Hole @%du OK\n", off / FILE_INTERVAL);
+	}
+
+	close(fd);
+	pass();
+	return 0;
+
+failed:
+	close(fd);
+	return 1;
+}
diff --git a/test/zdtm/static/ghost_multi_hole00.desc b/test/zdtm/static/ghost_multi_hole00.desc
new file mode 100644
index 000000000..3981e8180
--- /dev/null
+++ b/test/zdtm/static/ghost_multi_hole00.desc
@@ -0,0 +1 @@
+{'dopts': '--ghost-limit 8M --no-ghost-fiemap'}
diff --git a/test/zdtm/static/ghost_multi_hole01.c b/test/zdtm/static/ghost_multi_hole01.c
new file mode 120000
index 000000000..c75006a6b
--- /dev/null
+++ b/test/zdtm/static/ghost_multi_hole01.c
@@ -0,0 +1 @@
+ghost_multi_hole00.c
\ No newline at end of file
diff --git a/test/zdtm/static/ghost_multi_hole01.desc b/test/zdtm/static/ghost_multi_hole01.desc
new file mode 100644
index 000000000..d1dc68a54
--- /dev/null
+++ b/test/zdtm/static/ghost_multi_hole01.desc
@@ -0,0 +1 @@
+{'dopts': '--ghost-limit 8M --ghost-fiemap'}
diff --git a/test/zdtm/static/macvlan.checkskip b/test/zdtm/static/macvlan.checkskip
new file mode 100755
index 000000000..f4e060953
--- /dev/null
+++ b/test/zdtm/static/macvlan.checkskip
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+FAIL=0
+
+create_macvlan_device() {
+	if ! ip link add test_mvlan1 type veth >/dev/null 2>&1; then
+		FAIL=1
+	fi
+	if ! ip link add mymacvlan1 link test_mvlan1 type macvlan >/dev/null 2>&1; then
+		FAIL=1
+	fi
+
+	return "${FAIL}"
+}
+
+cleanup() {
+	ip link del test_mvlan1 >/dev/null 2>&1
+	ip link del mymacvlan1 >/dev/null 2>&1
+}
+
+trap "cleanup" QUIT TERM INT HUP EXIT
+
+# Test once without loading the module
+if create_macvlan_device; then
+	exit 0
+fi
+
+# Test once more with explicitly loading the module
+if ! modprobe macvlan >/dev/null 2>&1; then
+	exit 1
+fi
+create_macvlan_device
+
+if [ "${FAIL}" == "1" ]; then
+	exit 1
+fi
+
+exit 0
diff --git a/test/zdtm/static/maps00.c b/test/zdtm/static/maps00.c
index b1e55e861..f6989f3af 100644
--- a/test/zdtm/static/maps00.c
+++ b/test/zdtm/static/maps00.c
@@ -137,7 +137,7 @@ static int check_map(struct map *map)
 			}
 		/* prot |= PROT_READ//	need barrier before this line,
 					because compiler change order commands.
-					I finded one method: look at next lines*/
+					I found one method: look at next lines*/
 	} else
 		prot &= PROT_WRITE | !PROT_READ | PROT_EXEC;
 
diff --git a/test/zdtm/static/maps02.c b/test/zdtm/static/maps02.c
index 29f1372c9..38244f020 100644
--- a/test/zdtm/static/maps02.c
+++ b/test/zdtm/static/maps02.c
@@ -2,11 +2,19 @@
 #include "zdtmtst.h"
 #include "get_smaps_bits.h"
 
+#ifndef MAP_DROPPABLE
+#define MAP_DROPPABLE 0x08
+#endif
+
 #ifndef MADV_DONTDUMP
 #define MADV_DONTDUMP 16
 #endif
 
-const char *test_doc = "Test shared memory with advises";
+#ifndef MADV_WIPEONFORK
+#define MADV_WIPEONFORK 18
+#endif
+
+const char *test_doc = "Test private memory with advises";
 const char *test_author = "Cyrill Gorcunov <gorcunov@openvz.org>";
 
 struct mmap_data {
@@ -23,8 +31,14 @@ static int alloc_anon_mmap(struct mmap_data *m, int flags, int adv)
 {
 	m->start = mmap(NULL, MEM_SIZE, PROT_READ | PROT_WRITE, flags, -1, 0);
 	if (m->start == MAP_FAILED) {
-		pr_perror("mmap failed");
-		return -1;
+		if (errno == EINVAL) {
+			test_msg("mmap failed, no kernel support\n");
+			*m = (struct mmap_data){};
+			return 0;
+		} else {
+			pr_perror("mmap failed");
+			return -1;
+		}
 	}
 
 	if (madvise(m->start, MEM_SIZE, adv)) {
@@ -43,12 +57,12 @@ static int alloc_anon_mmap(struct mmap_data *m, int flags, int adv)
 
 int main(int argc, char **argv)
 {
-	struct mmap_data m[5] = {};
+	struct mmap_data m[7] = {};
 	size_t i;
 
 	test_init(argc, argv);
 
-	test_msg("Alloc growsdown\n");
+	test_msg("Alloc dontfork\n");
 	if (alloc_anon_mmap(&m[0], MAP_PRIVATE | MAP_ANONYMOUS, MADV_DONTFORK))
 		return -1;
 
@@ -64,10 +78,18 @@ int main(int argc, char **argv)
 	if (alloc_anon_mmap(&m[3], MAP_PRIVATE | MAP_ANONYMOUS, MADV_HUGEPAGE))
 		return -1;
 
-	test_msg("Alloc dontfork/random|mergeable\n");
+	test_msg("Alloc mergeable\n");
 	if (alloc_anon_mmap(&m[4], MAP_PRIVATE | MAP_ANONYMOUS, MADV_MERGEABLE))
 		return -1;
 
+	test_msg("Alloc wipeonfork\n");
+	if (alloc_anon_mmap(&m[5], MAP_PRIVATE | MAP_ANONYMOUS, MADV_WIPEONFORK))
+		return -1;
+
+	test_msg("Alloc droppable\n");
+	if (alloc_anon_mmap(&m[6], MAP_DROPPABLE | MAP_ANONYMOUS, MADV_NORMAL))
+		return -1;
+
 	test_msg("Fetch existing flags/adv\n");
 	for (i = 0; i < sizeof(m) / sizeof(m[0]); i++) {
 		if (get_smaps_bits((unsigned long)m[i].start, &m[i].orig_flags, &m[i].orig_madv))
diff --git a/test/zdtm/static/maps11.c b/test/zdtm/static/maps11.c
new file mode 100644
index 000000000..df309714b
--- /dev/null
+++ b/test/zdtm/static/maps11.c
@@ -0,0 +1,205 @@
+#include <stdint.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "zdtmtst.h"
+
+#ifndef MAP_DROPPABLE
+#define MAP_DROPPABLE 0x08
+#endif
+
+#ifndef MADV_WIPEONFORK
+#define MADV_WIPEONFORK 18
+#endif
+
+const char *test_doc = "Test MAP_DROPPABLE/MADV_WIPEONFORK mappings with 2 processes";
+const char *test_author = "Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>";
+
+bool mem_is_zero(const uint8_t *buffer, size_t length)
+{
+	size_t i;
+
+	for (i = 0; i < length; i++)
+		if (buffer[i] != 0)
+			return false;
+
+	return true;
+}
+
+int main(int argc, char **argv)
+{
+	uint8_t *p1, *p2;
+	pid_t pid;
+	int status;
+	const char data[] = "MADV_WIPEONFORK vma data";
+	bool criu_was_there = false;
+	struct stat st1, st2;
+
+	test_init(argc, argv);
+
+	p1 = mmap(NULL, sizeof(data), PROT_READ | PROT_WRITE,
+		  MAP_DROPPABLE | MAP_ANONYMOUS, 0, 0);
+	if (p1 == MAP_FAILED) {
+		if (errno == EINVAL) {
+			skip("mmap failed, no kernel support for MAP_DROPPABLE\n");
+			goto skip;
+		} else {
+			pr_perror("mmap failed");
+			return -1;
+		}
+	}
+
+	p2 = mmap(NULL, sizeof(data), PROT_READ | PROT_WRITE,
+		  MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+	if (p2 == MAP_FAILED) {
+		pr_perror("mmap failed");
+		return 1;
+	}
+
+	if (madvise(p2, sizeof(data), MADV_WIPEONFORK)) {
+		pr_perror("madvise failed");
+		return -1;
+	}
+
+	/* contents of this mapping is supposed to be dropped after C/R */
+	memcpy(p1, data, sizeof(data));
+
+	/* contents of this mapping is supposed to be dropped after fork() */
+	memcpy(p2, data, sizeof(data));
+
+	/*
+	 * Let's spawn a process before C/R so our mappings get inherited
+	 * then, after C/R we need to ensure that CRIU memory premapping
+	 * machinery works properly.
+	 *
+	 * It is important, because we restore MADV_WIPEONFORK on a later
+	 * stages (after vma premapping happens) and we need to ensure that
+	 * CRIU handles everything in a right way.
+	 */
+	pid = test_fork();
+	if (pid < 0) {
+		pr_perror("fork failed");
+		return 1;
+	}
+
+	if (pid == 0) {
+		test_waitsig();
+
+		/*
+		 * Both mappings have VM_WIPEONFORK flag set,
+		 * so we expect to have it null-ified after fork().
+		 */
+		if (!mem_is_zero(p1, sizeof(data)) ||
+		    !mem_is_zero(p2, sizeof(data))) {
+			pr_err("1st child: memory check failed\n");
+			return 1;
+		}
+
+		return 0;
+	}
+
+	/*
+	 * A simple way to detect if C/R happened is to compare st_ino
+	 * fields of stat() on the procfs files of the current task.
+	 *
+	 * Hopefully, this terrible hack is never used in real-world
+	 * applications ;-) Here, we only need this to make test
+	 * to pass with/without --nocr option.
+	 */
+	if (stat("/proc/self/status", &st1)) {
+		pr_perror("stat");
+		return 1;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	/* signal a child process to continue */
+	if (kill(pid, SIGTERM)) {
+		pr_perror("kill");
+		goto err;
+	}
+
+	if (waitpid(pid, &status, 0) != pid) {
+		pr_perror("1st waitpid");
+		goto err;
+	}
+
+	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+		fail("1st process didn't exit cleanly: status=%d", status);
+		goto err;
+	}
+
+	if (stat("/proc/self/status", &st2)) {
+		pr_perror("stat");
+		return 1;
+	}
+
+	/* detect CRIU */
+	criu_was_there = st1.st_ino != st2.st_ino;
+
+	/*
+	 * We should mark failure if one of the following happens:
+	 * 1. MAP_DROPPABLE memory is not zero after C/R
+	 * 2. MAP_DROPPABLE memory somehow changed without C/R
+	 *    (kernel issue? memory pressure?)
+	 * 3. MADV_WIPEONFORK memory is not preserved
+	 *
+	 * We care about 2nd case only because we would like test
+	 * to pass even with --nocr zdtm.py option.
+	 */
+	if ((criu_was_there && !mem_is_zero(p1, sizeof(data))) ||
+	    (!criu_was_there && memcmp(p1, data, sizeof(data))) ||
+	    memcmp(p2, data, sizeof(data))) {
+		fail("Data mismatch");
+		return 1;
+	}
+
+	/* contents of these mappings is supposed to be dropped after fork() */
+	memcpy(p1, data, sizeof(data));
+	memcpy(p2, data, sizeof(data));
+
+	pid = test_fork();
+	if (pid < 0) {
+		pr_perror("fork failed");
+		return 1;
+	}
+
+	if (pid == 0) {
+		if (!mem_is_zero(p1, sizeof(data)) ||
+		    !mem_is_zero(p2, sizeof(data))) {
+			pr_err("2nd child: memory check failed\n");
+			return 1;
+		}
+
+		return 0;
+	}
+
+	if (waitpid(pid, &status, 0) != pid) {
+		pr_perror("2nd waitpid");
+		goto err;
+	}
+
+	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+		fail("2nd process didn't exit cleanly: status=%d", status);
+		goto err;
+	}
+
+	pass();
+
+	return 0;
+err:
+	if (waitpid(-1, NULL, WNOHANG) == 0) {
+		kill(pid, SIGTERM);
+		wait(NULL);
+	}
+	return 1;
+
+skip:
+	test_daemon();
+	test_waitsig();
+	pass();
+	return 0;
+}
diff --git a/test/zdtm/static/maps12.c b/test/zdtm/static/maps12.c
new file mode 100644
index 000000000..f0d6c2381
--- /dev/null
+++ b/test/zdtm/static/maps12.c
@@ -0,0 +1,351 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <linux/limits.h>
+#include "zdtmtst.h"
+
+const char *test_doc = "Test madvise(MADV_GUARD_INSTALL)";
+const char *test_author = "Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>";
+/* some parts of code were taken from Linux kernel's kselftest guard-pages.c
+   written by Lorenzo Stoakes <lorenzo.stoakes@oracle.com> */
+
+char *filename;
+int fd;
+TEST_OPTION(filename, string, "file name", 1);
+
+#ifndef MADV_GUARD_INSTALL
+#define MADV_GUARD_INSTALL 102
+#endif
+
+uint8_t *map_base;
+
+struct {
+	unsigned int pages_num;
+	bool filemap;
+} vmas[] = {
+	{ 2, false },
+	{ 2, false },
+	{ 2, false },
+	{ 2, true },
+	{ 2, true },
+	{ 2, true },
+};
+
+struct {
+	bool guarded;
+	bool wipeonfork;
+} pages[] = {
+	{ false, false }, /* vmas[0] */
+	{ true, false },
+	{ true, false }, /* vmas[1] */
+	{ false, false },
+	{ false, false }, /* vmas[2] */
+	{ true, true },
+	{ true, false }, /* vmas[3] */
+	{ false, false },
+	{ true, false }, /* vmas[4] */
+	{ true, false },
+	{ false, false }, /* vmas[5] */
+	{ true, false },
+};
+
+static volatile sig_atomic_t signal_jump_set;
+static sigjmp_buf signal_jmp_buf;
+
+static void handle_sigsegv(int signo)
+{
+	if (!signal_jump_set)
+		return;
+
+	siglongjmp(signal_jmp_buf, 1);
+}
+
+static bool try_write_to_addr(uint8_t *ptr)
+{
+	bool failed;
+
+	/* Tell signal handler to jump back here on fatal signal. */
+	signal_jump_set = true;
+	/* If a fatal signal arose, we will jump back here and failed is set. */
+	failed = sigsetjmp(signal_jmp_buf, 1) != 0;
+
+	if (!failed)
+		*ptr = 'x';
+
+	signal_jump_set = false;
+	return !failed;
+}
+
+static int setup_sigsegv_handler(void)
+{
+	uint8_t write_me;
+
+	if (signal(SIGSEGV, handle_sigsegv) == SIG_ERR) {
+		pr_perror("setting SIGSEGV handler failed");
+		return 1;
+	}
+
+	/* ensure that try_write_to_addr() works properly */
+	if (!try_write_to_addr(&write_me)) {
+		pr_err("Failed to write at valid addr. Buggy try_write_to_addr()?\n");
+		return 1;
+	}
+
+	if (try_write_to_addr(NULL)) {
+		pr_err("Failed to detect an invalid write. Buggy try_write_to_addr()?\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline void *mmap_pages(void *addr_hint, unsigned int count, bool filemap)
+{
+	char *map;
+
+	map = mmap(addr_hint, count * PAGE_SIZE, PROT_WRITE | PROT_READ,
+		   MAP_PRIVATE | (filemap ? 0 : MAP_ANONYMOUS) | (addr_hint ? MAP_FIXED : 0),
+		   filemap ? fd : -1,
+		   filemap ? (off_t)((intptr_t)addr_hint - (intptr_t)map_base) : 0);
+	if (map == MAP_FAILED || (addr_hint && (map != addr_hint)))
+		return MAP_FAILED;
+
+	return map;
+}
+
+static int __check_guards(const char *when, bool in_child)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pages); i++) {
+		/*
+		 * Skip pages that were never guarded, and also those
+		 * that were, but have MADV_WIPEONFORK which means that
+		 * guards were removed on fork.
+		 */
+		if (!pages[i].guarded || (in_child && pages[i].wipeonfork))
+			continue;
+
+		if (try_write_to_addr(&map_base[i * PAGE_SIZE])) {
+			pr_err("successful write to a guarded area %d %s C/R\n",
+			       i, when);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int check_guards(const char *when)
+{
+	int status;
+	pid_t pid;
+
+	/*
+	 * First of all, check that guards are on their places
+	 * in a main test process.
+	 */
+	if (__check_guards(when, false)) {
+		return 1;
+	}
+
+	/*
+	 * Now, check that guards are on their places
+	 * after fork(). This allows to ensure that
+	 * combo MADV_WIPEONFORK + MADV_GUARD_INSTALL
+	 * is restored properly too.
+	 */
+
+	pid = test_fork();
+	if (pid < 0) {
+		pr_perror("check_guards: fork failed");
+		return 1;
+	}
+
+	if (pid == 0) {
+		if (__check_guards(when, true)) {
+			pr_err("check_guards(\"%s\") failed in child\n", when);
+			exit(1);
+		}
+
+		exit(0);
+	}
+
+	if (waitpid(pid, &status, 0) != pid) {
+		pr_perror("check_guards: waitpid");
+		return 1;
+	}
+
+	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+		pr_err("check_guards: process didn't exit cleanly: status=%d\n", status);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void gen_pages_data(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pages); i++) {
+		uint32_t crc;
+
+		if (pages[i].guarded)
+			continue;
+
+		crc = ~0;
+		datagen(&map_base[i * PAGE_SIZE], PAGE_SIZE, &crc);
+	}
+}
+
+static int set_pages_madvs(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pages); i++) {
+		if (pages[i].guarded) {
+			if (madvise(&map_base[i * PAGE_SIZE], PAGE_SIZE,
+				    MADV_GUARD_INSTALL)) {
+				pr_perror("MADV_GUARD_INSTALL failed on page %d", i);
+				return 1;
+			}
+		}
+
+		if (pages[i].wipeonfork) {
+			if (madvise(&map_base[i * PAGE_SIZE], PAGE_SIZE,
+				    MADV_WIPEONFORK)) {
+				pr_perror("MADV_WIPEONFORK failed on page %d", i);
+				return 1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int check_pages_data(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pages); i++) {
+		uint32_t crc;
+
+		if (pages[i].guarded)
+			continue;
+
+		crc = ~0;
+		if (datachk(&map_base[i * PAGE_SIZE], PAGE_SIZE, &crc)) {
+			pr_err("Page %d is corrupted\n", i);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int prepare_vmas(void)
+{
+	char *map;
+	int i, shift;
+
+	shift = 0;
+	for (i = 0; i < ARRAY_SIZE(vmas); i++) {
+		map = mmap_pages(&map_base[shift * PAGE_SIZE],
+				 vmas[i].pages_num, vmas[i].filemap);
+		if (map == MAP_FAILED) {
+			pr_err("mmap of [%d,%d] pages failed\n",
+			       shift, shift + vmas[i].pages_num);
+			return 1;
+		}
+
+		shift += vmas[i].pages_num;
+	}
+
+	if (shift != ARRAY_SIZE(pages)) {
+		pr_err("Different number of pages in vmas and pages arrays.\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned int pages_num = ARRAY_SIZE(pages);
+
+	test_init(argc, argv);
+
+	fd = open(filename, O_TRUNC | O_CREAT | O_RDWR, 0600);
+	if (fd < 0) {
+		pr_perror("Unable to create a test file");
+		return -1;
+	}
+
+	if (ftruncate(fd, pages_num * PAGE_SIZE)) {
+		pr_perror("Unable to ftruncate a test file");
+		return -1;
+	}
+
+	if (setup_sigsegv_handler()) {
+		pr_err("setup_sigsegv_handler() failed\n");
+		return 1;
+	}
+
+	/* let's find a large enough area in address space */
+	map_base = mmap_pages(NULL, pages_num, false);
+	if (map_base == MAP_FAILED) {
+		pr_err("mmap of %d pages failed\n", pages_num);
+		return 1;
+	}
+
+	/*
+	 * Now we know that we have a free vm address space area
+	 * [map_base, map_base + pages_num * PAGE_SIZE).
+	 * We can use (map_base) as a hint for our further mmaps.
+	 */
+	if (prepare_vmas()) {
+		pr_err("prepare_vmas() failed\n");
+		return 1;
+	}
+
+	/* fill non-guarded pages with data and preserve checksums */
+	gen_pages_data();
+
+	if (set_pages_madvs()) {
+		pr_err("set_pages_madvs() failed\n");
+		return 1;
+	}
+
+	/* ensure that madvise(MADV_GUARD_INSTALL) works like expected */
+	if (check_guards("before")) {
+		pr_err("check_guards(\"before\") failed\n");
+		return 1;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	/* ensure that guards are at their places */
+	if (check_guards("after")) {
+		fail("check_guards(\"after\") failed");
+		return 1;
+	}
+
+	/* check that non-guarded pages still contain original data */
+	if (check_pages_data()) {
+		fail("check_pages_data() failed");
+		return 1;
+	}
+
+	pass();
+	munmap(map_base, pages_num * PAGE_SIZE);
+	close(fd);
+	return 0;
+}
diff --git a/test/zdtm/static/maps12.desc b/test/zdtm/static/maps12.desc
new file mode 100644
index 000000000..3f7627ff3
--- /dev/null
+++ b/test/zdtm/static/maps12.desc
@@ -0,0 +1 @@
+{'flavor': 'h', 'feature': 'pagemap_scan_guard_pages'}
diff --git a/test/zdtm/static/membarrier.c b/test/zdtm/static/membarrier.c
new file mode 100644
index 000000000..85d705ba7
--- /dev/null
+++ b/test/zdtm/static/membarrier.c
@@ -0,0 +1,149 @@
+#include <linux/membarrier.h>
+#include <sys/syscall.h>
+#include <stdbool.h>
+#include "zdtmtst.h"
+
+const char *test_doc = "Test membarrier() migration";
+const char *test_author = "Michał Mirosław <emmir@google.com>";
+
+/*
+ * Define membarrier() CMDs to avoid depending on exact kernel header version.
+ */
+#define MEMBARRIER_CMD_GLOBAL_EXPEDITED			    (1 << 1)
+#define MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED	    (1 << 2)
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED		    (1 << 3)
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED	    (1 << 4)
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE	    (1 << 5)
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE (1 << 6)
+#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ		    (1 << 7)
+#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ	    (1 << 8)
+#define MEMBARRIER_CMD_GET_REGISTRATIONS		    (1 << 9)
+
+static int membarrier(int cmd, unsigned int flags, int cpu_id)
+{
+	return syscall(__NR_membarrier, cmd, flags, cpu_id);
+}
+
+static const struct {
+	const char *name_suffix;
+	int register_cmd;
+	int execute_cmd;
+} membarrier_cmds[] = {
+	{ "GLOBAL_EXPEDITED",            MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED,
+		MEMBARRIER_CMD_GLOBAL_EXPEDITED },
+	{ "PRIVATE_EXPEDITED",           MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED,
+		MEMBARRIER_CMD_PRIVATE_EXPEDITED },
+	{ "PRIVATE_EXPEDITED_SYNC_CORE", MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE,
+		MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE },
+	{ "PRIVATE_EXPEDITED_RSEQ",      MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ,
+		MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ },
+};
+static const int n_membarrier_cmds = sizeof(membarrier_cmds) / sizeof(*membarrier_cmds);
+
+static int register_membarriers(void)
+{
+	int barriers_supported, barriers_registered;
+	bool all_ok = true;
+
+	barriers_supported = membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
+	if (barriers_supported < 0) {
+		fail("membarrier() not supported by running kernel");
+		return -1;
+	}
+
+	barriers_registered = 0;
+	for (int i = 0; i < n_membarrier_cmds; ++i) {
+		if (~barriers_supported & membarrier_cmds[i].register_cmd)
+			continue;
+
+		barriers_registered |= membarrier_cmds[i].register_cmd;
+
+		if (membarrier(membarrier_cmds[i].register_cmd, 0, 0) < 0) {
+			pr_perror("membarrier(REGISTER_%s)", membarrier_cmds[i].name_suffix);
+			all_ok = false;
+		}
+	}
+
+	if (!all_ok) {
+		fail("can't register membarrier()s - tried %#x, kernel %#x",
+		     barriers_registered, barriers_supported);
+		return -1;
+	}
+
+	if (!barriers_registered) {
+		fail("no known membarrier() cmds are supported by the kernel");
+		return -1;
+	}
+
+	return barriers_registered;
+}
+
+static bool check_membarriers_compat(int barriers_registered)
+{
+	bool all_ok = true;
+
+	for (int i = 0; i < n_membarrier_cmds; ++i) {
+		if (~barriers_registered & membarrier_cmds[i].register_cmd)
+			continue;
+		if (membarrier(membarrier_cmds[i].execute_cmd, 0, 0) < 0) {
+			pr_perror("membarrier(%s)", membarrier_cmds[i].name_suffix);
+			all_ok = false;
+		}
+	}
+
+	if (!all_ok)
+		fail("membarrier() check failed");
+
+	return all_ok;
+}
+
+static bool check_membarriers_get_registrations(int barriers_registered)
+{
+	int ret = membarrier(MEMBARRIER_CMD_GET_REGISTRATIONS, 0, 0);
+	if (ret < 0) {
+		if (errno == EINVAL) {
+			test_msg("membarrier(MEMBARRIER_CMD_GET_REGISTRATIONS) not supported by running kernel");
+			return true;
+		}
+		fail("membarrier(MEMBARRIER_CMD_GET_REGISTRATIONS)");
+		return false;
+	}
+	if (ret != barriers_registered) {
+		fail("MEMBARRIER_CMD_GET_REGISTRATIONS check failed, expected: %d, got: %d",
+		     barriers_registered, ret);
+		return false;
+	}
+
+	return true;
+}
+
+static bool check_membarriers(int barriers_registered)
+{
+	return check_membarriers_compat(barriers_registered) &&
+	       check_membarriers_get_registrations(barriers_registered);
+}
+
+int main(int argc, char **argv)
+{
+	int barriers_registered;
+
+	test_init(argc, argv);
+
+	barriers_registered = register_membarriers();
+	if (barriers_registered < 0)
+		return 1;
+
+	test_msg("Pre-migration membarriers check\n");
+	if (!check_membarriers(barriers_registered))
+		return 1;
+
+	test_daemon();
+	test_waitsig();
+
+	test_msg("Post-migration membarriers check\n");
+	if (!check_membarriers(barriers_registered))
+		return 1;
+
+	pass();
+	return 0;
+}
diff --git a/test/zdtm/static/memfd00.c b/test/zdtm/static/memfd00.c
index d037f6969..8d77ed06e 100644
--- a/test/zdtm/static/memfd00.c
+++ b/test/zdtm/static/memfd00.c
@@ -30,8 +30,10 @@ int main(int argc, char *argv[])
 {
 	int fd, fl_flags1, fl_flags2, fd_flags1, fd_flags2;
 	struct statfs statfs1, statfs2;
+	struct stat stat;
 	off_t pos1, pos2;
 	char buf[5];
+	int fmode1, fmode2;
 
 	test_init(argc, argv);
 
@@ -58,6 +60,13 @@ int main(int argc, char *argv[])
 	if (lseek(fd, pos1, SEEK_SET) < 0)
 		err(1, "seek error");
 
+	if (fchmod(fd, 0642))
+		err(1, "Can't set permission bits");
+
+	if (fstat(fd, &stat) < 0)
+		err(1, "fstat() issue");
+	fmode1 = stat.st_mode;
+
 	test_daemon();
 	test_waitsig();
 
@@ -85,6 +94,15 @@ int main(int argc, char *argv[])
 		return 1;
 	}
 
+	if (fstat(fd, &stat) < 0)
+		err(1, "fstat() issue");
+	fmode2 = stat.st_mode;
+
+	if (fmode1 != fmode2) {
+		fail("stat.st_mode = %#o != %#o", fmode2, fmode1);
+		return 1;
+	}
+
 	pos2 = lseek(fd, 0, SEEK_CUR);
 	if (pos1 != pos2) {
 		fail("position differs");
diff --git a/test/zdtm/static/memfd04.c b/test/zdtm/static/memfd04.c
new file mode 100644
index 000000000..215e949d1
--- /dev/null
+++ b/test/zdtm/static/memfd04.c
@@ -0,0 +1,132 @@
+#include <linux/memfd.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "exec(memfd)";
+const char *test_author = "Michał Mirosław <emmir@google.com>";
+
+static int _memfd_create(const char *name, unsigned int flags)
+{
+	return syscall(SYS_memfd_create, name, flags);
+}
+
+static int _execveat(int dirfd, const char *pathname, const char *const argv[], const char *const envp[], int flags)
+{
+	return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
+}
+
+static const char *const script_argv[] = { "true", NULL };
+static const char *const script_env[] = { NULL };
+
+static bool test_exec_fd(int fd)
+{
+	int err, pid, status;
+
+	err = fcntl(fd, F_GETFD);
+	if (err < 0) {
+		fail("fcntl(F_GETFD)");
+		return false;
+	}
+	if (err) {
+		errno = 0;
+		fail("F_GETFD for the memfd returned %d but expected 0", err);
+		return false;
+	}
+
+	pid = fork();
+	if (!pid) {
+		_execveat(fd, "", script_argv, script_env, AT_EMPTY_PATH);
+		err = errno;
+		pr_perror("execveat()");
+		_exit(err);
+	}
+
+	if (pid < 0) {
+		fail("fork()");
+		return false;
+	}
+
+	while (waitpid(pid, &status, 0) != pid) {
+		if (errno == EINTR)
+			continue;
+		fail("waitpid(child=%d)", pid);
+		return false;
+	}
+
+	if (status != 0) {
+		pr_err("child exited with status=%d\n", status);
+		return false;
+	}
+
+	return true;
+}
+
+static const char script[] = "#!/bin/true";
+static const size_t script_len = sizeof(script) - 1;
+
+int main(int argc, char *argv[])
+{
+#ifdef MEMFD05
+	char path[PATH_MAX];
+	char *addr_p, *addr_s;
+	int rofd;
+#endif
+	int fd;
+
+	test_init(argc, argv);
+
+	fd = _memfd_create("somename", 0);
+	if (fd < 0) {
+		pr_perror("memfd_create()");
+		return 1;
+	}
+	if (ftruncate(fd, script_len) == -1) {
+		pr_perror("ftruncate");
+		return 1;
+	}
+	if (write(fd, script, script_len) != script_len) {
+		pr_perror("write(memfd)");
+		return 1;
+	}
+#ifdef MEMFD05
+	snprintf(path, PATH_MAX - 1, "/proc/self/fd/%d", fd);
+	rofd = open(path, O_RDONLY);
+	if (rofd < 0) {
+		pr_perror("unable to open read-only memfd");
+		return 1;
+	}
+	addr_p = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FILE | MAP_PRIVATE, rofd, 0);
+	if (addr_p == MAP_FAILED) {
+		pr_perror("mmap");
+		return 1;
+	}
+	addr_s = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0);
+	if (addr_s == MAP_FAILED) {
+		pr_perror("mmap");
+		return 1;
+	}
+#endif
+
+	if (!test_exec_fd(fd))
+		return 1;
+
+	test_msg("execveat(memfd) succeeded before C/R.\n");
+
+	test_daemon();
+	test_waitsig();
+
+	if (!test_exec_fd(fd))
+		return 1;
+
+	pass();
+
+	return 0;
+}
diff --git a/test/zdtm/static/memfd04.desc b/test/zdtm/static/memfd04.desc
new file mode 100644
index 000000000..bbf136d14
--- /dev/null
+++ b/test/zdtm/static/memfd04.desc
@@ -0,0 +1 @@
+{'deps': ['/bin/true']}
diff --git a/test/zdtm/static/memfd05.c b/test/zdtm/static/memfd05.c
new file mode 120000
index 000000000..6caa9556f
--- /dev/null
+++ b/test/zdtm/static/memfd05.c
@@ -0,0 +1 @@
+memfd04.c
\ No newline at end of file
diff --git a/test/zdtm/static/memfd05.desc b/test/zdtm/static/memfd05.desc
new file mode 120000
index 000000000..1b4963572
--- /dev/null
+++ b/test/zdtm/static/memfd05.desc
@@ -0,0 +1 @@
+memfd04.desc
\ No newline at end of file
diff --git a/test/zdtm/static/mnt_ext_file_bind_auto.c b/test/zdtm/static/mnt_ext_file_bind_auto.c
new file mode 100644
index 000000000..0c3b9f5fb
--- /dev/null
+++ b/test/zdtm/static/mnt_ext_file_bind_auto.c
@@ -0,0 +1,104 @@
+#include <sys/mount.h>
+#include <linux/limits.h>
+#include <sys/stat.h>
+#include <sched.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Check if external file mount works";
+const char *test_author = "Pavel Tikhomirov <ptikhomirov@virtuozzo.com>";
+
+char *filename = "mnt_ext_file_bind_auto_bind_auto.file";
+TEST_OPTION(filename, string, "file name", 1);
+
+char *source = "mnt_ext_file_bind_auto_bind_auto.source";
+
+int create_file(const char *path)
+{
+	int fd;
+
+	fd = open(path, O_CREAT | O_RDWR, 0644);
+	if (fd < 0) {
+		pr_perror("open");
+		return -1;
+	}
+
+	close(fd);
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	char *zdtm_newns = getenv("ZDTM_NEWNS");
+	char *tmp = "/tmp/zdtm_ext_file_bind_auto.tmp";
+	char *sourcefile = "/tmp/zdtm_ext_file_bind_auto.file";
+	char *root, tmpfile[PATH_MAX], testfile[PATH_MAX];
+
+	root = getenv("ZDTM_ROOT");
+	if (root == NULL) {
+		pr_perror("root");
+		return 1;
+	}
+
+	if (!zdtm_newns) {
+		pr_perror("ZDTM_NEWNS is not set");
+		return 1;
+	} else if (strcmp(zdtm_newns, "1")) {
+		goto test;
+	}
+
+	/* Prepare file bindmount in criu root (source for external file bindmount) */
+        mkdir(tmp, 0755);
+        if (mount(source, tmp, "tmpfs", 0, NULL)) {
+                pr_perror("mount tmpfs");
+                return 1;
+        }
+        if (mount(NULL, tmp, NULL, MS_PRIVATE, NULL)) {
+                pr_perror("make private");
+                return 1;
+        }
+
+	sprintf(tmpfile, "%s/%s", tmp, filename);
+	if (create_file(tmpfile))
+		return 1;
+
+	if (create_file(sourcefile))
+		return 1;
+
+	if (mount(tmpfile, sourcefile, NULL, MS_BIND, NULL)) {
+		pr_perror("bind");
+		return 1;
+	}
+
+	umount2(tmp, MNT_DETACH);
+
+	/* Prepare file in test root (mount point for external file bindmount) */
+	sprintf(testfile, "%s/%s", root, filename);
+	if (create_file(testfile))
+		return 1;
+
+	/*
+	 * Create temporary mntns, next mounts will not show up in criu mntns
+	 * and will be inherited into test mntns
+	 */
+	if (unshare(CLONE_NEWNS)) {
+		pr_perror("unshare");
+		return 1;
+	}
+
+	if (mount(sourcefile, testfile, NULL, MS_BIND, NULL)) {
+                pr_perror("bind");
+                return 1;
+        }
+test:
+	test_init(argc, argv);
+
+	test_daemon();
+	test_waitsig();
+
+	pass();
+	return 0;
+}
diff --git a/test/zdtm/static/mnt_ext_file_bind_auto.desc b/test/zdtm/static/mnt_ext_file_bind_auto.desc
new file mode 100644
index 000000000..825b08127
--- /dev/null
+++ b/test/zdtm/static/mnt_ext_file_bind_auto.desc
@@ -0,0 +1,4 @@
+{   'opts': '--external mnt[]',
+    'feature': 'mnt_id',
+    'flavor': 'ns uns',
+    'flags': 'suid'}
diff --git a/test/zdtm/static/mnt_ro_root.c b/test/zdtm/static/mnt_ro_root.c
new file mode 100644
index 000000000..2d8370150
--- /dev/null
+++ b/test/zdtm/static/mnt_ro_root.c
@@ -0,0 +1,32 @@
+#include <sys/mount.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Check if root mount remains read-only after c/r";
+const char *test_author = "Pavel Tikhomirov <ptikhomirov@virtuozzo.com>";
+
+char *dirname;
+TEST_OPTION(dirname, string, "directory name", 1);
+
+int main(int argc, char **argv)
+{
+	test_init(argc, argv);
+
+	if (mount(NULL, "/", NULL, MS_REMOUNT | MS_RDONLY | MS_BIND, NULL)) {
+		pr_perror("mount");
+		return 1;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	/*
+	 * Note: In zdtm.py:check_visible_state() we already check for all
+	 * tests, that all mounts in the test's mount namespace remain the
+	 * same, by comparing mountinfo before and after c/r. So rw/ro mount
+	 * option inconsistency will be detected there and we don't need to
+	 * check it in the test itself.
+	 */
+	pass();
+	return 0;
+}
diff --git a/test/zdtm/static/mnt_ro_root.desc b/test/zdtm/static/mnt_ro_root.desc
new file mode 100644
index 000000000..c9a8e4f18
--- /dev/null
+++ b/test/zdtm/static/mnt_ro_root.desc
@@ -0,0 +1,6 @@
+{
+	'flavor': 'ns uns',
+	'flags': 'suid',
+	'feature': 'mnt_id',
+	'bind': 'zdtm/static',
+}
diff --git a/test/zdtm/static/mnt_root_ext.c b/test/zdtm/static/mnt_root_ext.c
index 6a2eb068c..305e87262 100644
--- a/test/zdtm/static/mnt_root_ext.c
+++ b/test/zdtm/static/mnt_root_ext.c
@@ -51,6 +51,14 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
+	/*
+	 * Make mounts in temporary mntns slave, to prevent propagation to criu mntns
+	 */
+	if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) {
+		pr_perror("make rslave");
+		return 1;
+	}
+
 	/*
 	 * Populate to the tests root host's rootfs subdir
 	 */
diff --git a/test/zdtm/static/mntns_open.c b/test/zdtm/static/mntns_open.c
index 7d8bbbaa4..0430f5b99 100644
--- a/test/zdtm/static/mntns_open.c
+++ b/test/zdtm/static/mntns_open.c
@@ -17,7 +17,7 @@
 #define CLONE_NEWNS 0x00020000
 #endif
 
-const char *test_doc = "Check that mnt_id is repsected";
+const char *test_doc = "Check that mnt_id is respected";
 const char *test_author = "Pavel Emelianov <xemul@parallels.com>";
 
 #define MPTS_FILE "F"
diff --git a/test/zdtm/static/mntns_root_bind.c b/test/zdtm/static/mntns_root_bind.c
index 9e1ba06e6..4c0347cb2 100644
--- a/test/zdtm/static/mntns_root_bind.c
+++ b/test/zdtm/static/mntns_root_bind.c
@@ -71,7 +71,7 @@ int main(int argc, char **argv)
 		task_waiter_wait4(&t, 2);
 
 		if (access(bspath, F_OK)) {
-			fail("%s isn't accessiable", bspath);
+			fail("%s isn't accessible", bspath);
 			return 1;
 		}
 
diff --git a/test/zdtm/static/mount_complex_sharing.c b/test/zdtm/static/mount_complex_sharing.c
index b4463c41a..5f247a8e4 100644
--- a/test/zdtm/static/mount_complex_sharing.c
+++ b/test/zdtm/static/mount_complex_sharing.c
@@ -5,6 +5,7 @@
 #include <sys/mount.h>
 #include <linux/limits.h>
 
+#include "mountinfo.h"
 #include "zdtmtst.h"
 
 const char *test_doc = "Check complex sharing options for mounts";
@@ -211,6 +212,8 @@ static int mount_loop(void)
 
 int main(int argc, char **argv)
 {
+	MNTNS_ZDTM(mntns_before);
+	MNTNS_ZDTM(mntns_after);
 	int ret = 1;
 
 	test_init(argc, argv);
@@ -223,12 +226,23 @@ int main(int argc, char **argv)
 	if (mount_loop())
 		goto err;
 
+	if (mntns_parse_mountinfo(&mntns_before))
+		goto err;
+
 	test_daemon();
 	test_waitsig();
 
+	if (mntns_parse_mountinfo(&mntns_after))
+		goto err;
+
+	if (mntns_compare(&mntns_before, &mntns_after))
+		goto err;
+
 	pass();
 	ret = 0;
 err:
+	mntns_free_all(&mntns_before);
+	mntns_free_all(&mntns_after);
 	if (ret)
 		fail();
 	return ret;
diff --git a/test/zdtm/static/mtime_mmap.c b/test/zdtm/static/mtime_mmap.c
index faa2d6fad..4de8438ee 100644
--- a/test/zdtm/static/mtime_mmap.c
+++ b/test/zdtm/static/mtime_mmap.c
@@ -1,4 +1,5 @@
 #include <stdio.h>
+#include <inttypes.h>
 #include <unistd.h>
 #include <errno.h>
 #include <fcntl.h>
@@ -77,7 +78,7 @@ int main(int argc, char **argv)
 	mtime_new = fst.st_mtime;
 	/* time of last modification */
 	if (mtime_new <= mtime_old) {
-		fail("mtime %ld wasn't updated on mmapped %s file", mtime_new, filename);
+		fail("mtime %" PRId64 " wasn't updated on mmapped %s file", (int64_t)mtime_new, filename);
 		goto failed;
 	}
 
@@ -98,7 +99,7 @@ int main(int argc, char **argv)
 
 	/* time of last modification */
 	if (fst.st_mtime != mtime_new) {
-		fail("After migration, mtime changed to %ld", fst.st_mtime);
+		fail("After migration, mtime changed to %" PRId64, (int64_t)fst.st_mtime);
 		goto failed;
 	}
 
diff --git a/test/zdtm/static/net_lock_socket_iptables.desc b/test/zdtm/static/net_lock_socket_iptables.desc
index 936ff8702..cb622536f 100644
--- a/test/zdtm/static/net_lock_socket_iptables.desc
+++ b/test/zdtm/static/net_lock_socket_iptables.desc
@@ -1,5 +1,6 @@
 {
     'flavor': 'h',
+    'feature': 'has_ipt_legacy',
     'flags': 'suid excl reqrst',
     'dopts': '--tcp-established --network-lock iptables',
     'ropts': '--tcp-established',
diff --git a/test/zdtm/static/net_lock_socket_iptables.hook b/test/zdtm/static/net_lock_socket_iptables.hook
index 0ee147eb2..e9fcd7350 100755
--- a/test/zdtm/static/net_lock_socket_iptables.hook
+++ b/test/zdtm/static/net_lock_socket_iptables.hook
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import socket
 import time
diff --git a/test/zdtm/static/net_lock_socket_iptables6.desc b/test/zdtm/static/net_lock_socket_iptables6.desc
index 936ff8702..cb622536f 100644
--- a/test/zdtm/static/net_lock_socket_iptables6.desc
+++ b/test/zdtm/static/net_lock_socket_iptables6.desc
@@ -1,5 +1,6 @@
 {
     'flavor': 'h',
+    'feature': 'has_ipt_legacy',
     'flags': 'suid excl reqrst',
     'dopts': '--tcp-established --network-lock iptables',
     'ropts': '--tcp-established',
diff --git a/test/zdtm/static/netns-dev.c b/test/zdtm/static/netns-dev.c
index 1e6ee1dea..f268f2fec 100644
--- a/test/zdtm/static/netns-dev.c
+++ b/test/zdtm/static/netns-dev.c
@@ -414,7 +414,7 @@ static int check_stable_secret(struct test_conf *tc)
 		return -1;
 	}
 
-	ret = fscanf(fp, "%s", val);
+	ret = fscanf(fp, "%200s", val);
 	if (ret != 1) {
 		pr_perror("fscanf");
 		fclose(fp);
diff --git a/test/zdtm/static/netns-nf.desc b/test/zdtm/static/netns-nf.desc
index e7e73b1ae..58c23e8ba 100644
--- a/test/zdtm/static/netns-nf.desc
+++ b/test/zdtm/static/netns-nf.desc
@@ -1,6 +1,7 @@
 {   'deps': [   '/bin/sh',
                 '/sbin/iptables|/usr/sbin/iptables',
-                '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/iptables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so',
+                '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/iptables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_standard.so',
                 '/usr/bin/diff'],
     'flags': 'suid',
+    'feature': 'has_ipt_legacy',
     'flavor': 'ns uns'}
diff --git a/test/zdtm/static/netns-nft-ipt.desc b/test/zdtm/static/netns-nft-ipt.desc
index 4120f74d6..6d04589b3 100644
--- a/test/zdtm/static/netns-nft-ipt.desc
+++ b/test/zdtm/static/netns-nft-ipt.desc
@@ -2,7 +2,7 @@
    'deps': [   '/bin/sh',
                '/usr/sbin/nft',
                '/sbin/iptables|/usr/sbin/iptables',
-               '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/iptables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so',
+               '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/iptables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_standard.so',
                '/usr/bin/diff'],
     'flags': 'suid',
     'flavor': 'ns uns'}
diff --git a/test/zdtm/static/netns_lock_iptables.desc b/test/zdtm/static/netns_lock_iptables.desc
index 69020f34e..b465706b8 100644
--- a/test/zdtm/static/netns_lock_iptables.desc
+++ b/test/zdtm/static/netns_lock_iptables.desc
@@ -1,6 +1,7 @@
 {
     'flavor': 'h',
     'flags': 'suid excl reqrst',
+    'feature': 'has_ipt_legacy',
     'opts': '--tcp-established',
     'dopts': '--network-lock iptables',
     'ropts': '--join-ns net:/var/run/netns/criu-net-lock-test'
diff --git a/test/zdtm/static/netns_lock_iptables.hook b/test/zdtm/static/netns_lock_iptables.hook
index e7daf8a65..b51d3c2cc 100755
--- a/test/zdtm/static/netns_lock_iptables.hook
+++ b/test/zdtm/static/netns_lock_iptables.hook
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import subprocess
 import socket
@@ -67,7 +67,7 @@ if sys.argv[1] == "--post-start":
             cln, addr = srv.accept()
             cln.sendall(str.encode("--post-restore"))
             cln.close()
-        
+
         # Server will be closed when zdtm sends SIGKILL
 
 if sys.argv[1] == "--pre-dump":
diff --git a/test/zdtm/static/netns_sub_sysctl.c b/test/zdtm/static/netns_sub_sysctl.c
index 545a17308..03b478b7d 100644
--- a/test/zdtm/static/netns_sub_sysctl.c
+++ b/test/zdtm/static/netns_sub_sysctl.c
@@ -1,20 +1,38 @@
 #include <sched.h>
+#include <fcntl.h>
+#include <unistd.h>
 
 #include "zdtmtst.h"
 #include "sysctl.h"
 
-const char *test_doc = "Check dump and restore a net.unix.max_dgram_qlen sysctl parameter in subns";
+const char *test_doc = "Check dump and restore of sysctls in subns";
 const char *test_author = "Alexander Mikhalitsyn <alexander@mihalicyn.com>";
 
+#define MAX_STR_SYSCTL_LEN 200
+
+enum {
+	SYSCTL_INT,
+	SYSCTL_STR,
+};
+
 typedef struct {
 	const char *path;
+	int type;
 	int old;
 	int new;
+	char s_old[MAX_STR_SYSCTL_LEN];
+	char s_new[MAX_STR_SYSCTL_LEN];
+	bool set;
 } sysctl_opt_t;
 
 #define CONF_UNIX_BASE "/proc/sys/net/unix"
+#define IPV4_SYSCTL_BASE "/proc/sys/net/ipv4"
 
-static sysctl_opt_t net_unix_params[] = { { CONF_UNIX_BASE "/max_dgram_qlen", 0, 0 }, { NULL, 0, 0 } };
+static sysctl_opt_t net_unix_params[] = {
+	{CONF_UNIX_BASE "/max_dgram_qlen", SYSCTL_INT},
+	{IPV4_SYSCTL_BASE "/ping_group_range", SYSCTL_STR, 0, 0, "40000\t50000\n"},
+	{NULL, 0, 0}
+};
 
 int main(int argc, char **argv)
 {
@@ -23,10 +41,22 @@ int main(int argc, char **argv)
 	test_init(argc, argv);
 
 	for (p = net_unix_params; p->path != NULL; p++) {
-		p->old = (((unsigned)lrand48()) % 1023) + 1;
-		if (sysctl_write_int(p->path, p->old)) {
-			pr_perror("Can't change %s", p->path);
-			return -1;
+		if (access(p->path, W_OK) != 0) {
+			test_msg("%s doesn't exist\n", p->path);
+			continue;
+		}
+		p->set = true;
+		if (p->type == SYSCTL_INT) {
+			p->old = (((unsigned)lrand48()) % 1023) + 1;
+			if (sysctl_write_int(p->path, p->old)) {
+				pr_perror("Can't change %s", p->path);
+				return -1;
+			}
+		} else if (p->type == SYSCTL_STR) {
+			if (sysctl_write_str(p->path, p->s_old)) {
+				pr_perror("Can't change %s", p->path);
+				return -1;
+			}
 		}
 	}
 
@@ -34,13 +64,27 @@ int main(int argc, char **argv)
 	test_waitsig();
 
 	for (p = net_unix_params; p->path != NULL; p++) {
-		if (sysctl_read_int(p->path, &p->new))
-			ret = 1;
+		if (!p->set)
+			continue;
+		if (p->type == SYSCTL_INT) {
+			if (sysctl_read_int(p->path, &p->new))
+				ret = 1;
 
-		if (p->old != p->new) {
-			errno = EINVAL;
-			pr_perror("%s changed: %d ---> %d", p->path, p->old, p->new);
-			ret = 1;
+			if (p->old != p->new) {
+				errno = EINVAL;
+				pr_perror("%s changed: %d ---> %d", p->path, p->old, p->new);
+				ret = 1;
+			}
+		} else if (p->type == SYSCTL_STR) {
+			if (sysctl_read_str(p->path, p->s_new, MAX_STR_SYSCTL_LEN)) {
+				ret = 1;
+			} else {
+				if (strcmp(p->s_old, p->s_new)) {
+					errno = EINVAL;
+					pr_perror("%s changed: %s ---> %s", p->path, p->s_old, p->s_new);
+					ret = 1;
+				}
+			}
 		}
 	}
 
diff --git a/test/zdtm/static/netns_sub_sysctl.desc b/test/zdtm/static/netns_sub_sysctl.desc
index 535842668..0c357aefe 100644
--- a/test/zdtm/static/netns_sub_sysctl.desc
+++ b/test/zdtm/static/netns_sub_sysctl.desc
@@ -1,4 +1,4 @@
 {
-    'flavor': 'ns',
+    'flavor': 'ns uns',
     'flags': 'suid'
 }
diff --git a/test/zdtm/static/ofd_file_locks.c b/test/zdtm/static/ofd_file_locks.c
index 68b6f22f5..a68fa38ee 100644
--- a/test/zdtm/static/ofd_file_locks.c
+++ b/test/zdtm/static/ofd_file_locks.c
@@ -16,7 +16,7 @@ static int parse_ofd_lock(char *buf, struct flock *lck)
 	if (strncmp(buf, "lock:\t", 6) != 0)
 		return 1; /* isn't lock, skip record */
 
-	num = sscanf(buf, "%*s %*d: %s %s %s %*d %*x:%*x:%*d %lld %s", fl_flag, fl_type, fl_option, &start, fl_end);
+	num = sscanf(buf, "%*s %*d: %9s %14s %9s %*d %*x:%*x:%*d %lld %31s", fl_flag, fl_type, fl_option, &start, fl_end);
 
 	if (num < 4) {
 		pr_err("Invalid lock info %s\n", buf);
diff --git a/test/zdtm/static/packet_sock.c b/test/zdtm/static/packet_sock.c
index 4a9078f81..c1c94ac21 100644
--- a/test/zdtm/static/packet_sock.c
+++ b/test/zdtm/static/packet_sock.c
@@ -5,7 +5,7 @@ const char *test_author = "Pavel Emelyanov <xemul@parallels.com>";
 
 /*
  * Description:
- *  Create and bind several packet sockets, check thet getname
+ *  Create and bind several packet sockets, check that getname
  *  reports same result before and after c/r cycle. This is enough
  *  for _basic_ packet functionality only, but still.
  */
diff --git a/test/zdtm/static/pidfd_child.c b/test/zdtm/static/pidfd_child.c
new file mode 100644
index 000000000..ec559605d
--- /dev/null
+++ b/test/zdtm/static/pidfd_child.c
@@ -0,0 +1,66 @@
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Checks pidfd sends signal to child process after restore\n";
+const char *test_author = "Bhavik Sachdev <b.sachdev1904@gmail.com>";
+
+static int pidfd_open(pid_t pid, unsigned int flags)
+{
+	return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags)
+{
+	return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
+}
+
+int main(int argc, char* argv[])
+{
+	int pidfd, status;
+	pid_t child;
+
+	test_init(argc, argv);
+
+	child = fork();
+	if (child < 0) {
+		pr_perror("Unable to fork a new process");
+		return 1;
+	} else if (child == 0) {
+		test_waitsig();
+		return 0;
+	}
+
+	pidfd = pidfd_open(child, 0);
+	if (pidfd < 0) {
+		pr_perror("pidfd_open failed");
+		return 1;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	if (pidfd_send_signal(pidfd, SIGTERM, NULL, 0)) {
+		fail("Could not send signal");
+		goto err_close;
+	}
+
+	if (waitpid(child, &status, 0) != child) {
+		pr_perror("waitpid()");
+		goto err_close;
+	}
+
+	if (status != 0) {
+		fail("%d:%d:%d:%d", WIFEXITED(status), WEXITSTATUS(status), WIFSIGNALED(status), WTERMSIG(status));
+		goto err_close;
+	}
+
+	pass();
+	close(pidfd);
+	return 0;
+err_close:
+	close(pidfd);
+	return 1;
+}
diff --git a/test/zdtm/static/pidfd_dead.c b/test/zdtm/static/pidfd_dead.c
new file mode 100644
index 000000000..9c825899d
--- /dev/null
+++ b/test/zdtm/static/pidfd_dead.c
@@ -0,0 +1,244 @@
+#include <sys/statfs.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Check C/R of pidfds that point to dead processes\n";
+const char *test_author = "Bhavik Sachdev <b.sachdev1904@gmail.com>";
+
+#ifndef PID_FS_MAGIC
+#define PID_FS_MAGIC 0x50494446
+#endif
+
+/*
+ * main
+ *	`- child
+ *		`- grandchild
+ *
+ * main opens a pidfd for both child and grandchild.
+ * Before C/R we kill both child and grandchild.
+ * We end up with two unique dead pidfds.
+ */
+
+static long get_fs_type(int lfd)
+{
+	struct statfs fst;
+
+	if (fstatfs(lfd, &fst)) {
+		return -1;
+	}
+	return fst.f_type;
+}
+
+static int pidfd_open(pid_t pid, unsigned int flags)
+{
+	return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags)
+{
+	return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
+}
+
+static int open_pidfd_pair(int pidfd[2], int pid)
+{
+	pidfd[0] = pidfd_open(pid, 0);
+	if (pidfd[0] < 0) {
+		pr_perror("pidfd_open() failed");
+		return 1;
+	}
+
+	pidfd[1] = pidfd_open(pid, 0);
+	if (pidfd[1] < 0) {
+		close(pidfd[0]);
+		pr_perror("pidfd_open() failed");
+		return 1;
+	}
+	return 0;
+}
+
+static int compare_pidfds(int pidfd[2])
+{
+	/*
+	* After linux 6.9 we can compare inode numbers
+	* to determine if two pidfds point to the same process.
+	* While the inode number may change before and after C/R
+	* pidfds pointing to the same pid should have the same inode number.
+	*/
+	struct statx stats[2];
+	statx(pidfd[0], "", AT_EMPTY_PATH, STATX_ALL, &stats[0]);
+	statx(pidfd[1], "", AT_EMPTY_PATH, STATX_ALL, &stats[1]);
+	if (stats[0].stx_ino != stats[1].stx_ino)
+		return 1;
+	return 0;
+}
+
+static int check_for_pidfs(void)
+{
+	long type;
+	int pidfd = pidfd_open(getpid(), 0);
+	if (pidfd < 0) {
+		pr_perror("pidfd open() failed");
+		return -1;
+	}
+	type = get_fs_type(pidfd);
+	close(pidfd);
+	return type == PID_FS_MAGIC;
+}
+
+int main(int argc, char* argv[])
+{
+	#define READ 0
+	#define WRITE 1
+
+	int child, ret, gchild, p[2], status;
+	int cpidfd[2], gpidfd[2];
+	struct statx stats[2];
+
+	test_init(argc, argv);
+
+	ret = check_for_pidfs();
+	if (ret < 0)
+		return 1;
+
+	if (ret == 0) {
+		test_daemon();
+		test_waitsig();
+		skip("Test requires pidfs. skipping...");
+		pass();
+		return 0;
+	}
+
+	if (pipe(p)) {
+		pr_perror("pipe");
+		return 1;
+	}
+
+	child = test_fork();
+	if (child < 0) {
+		pr_perror("fork");
+		return 1;
+	} else if (child == 0) {
+		int gchild = test_fork();
+		close(p[READ]);
+		if (gchild < 0) {
+			pr_perror("fork");
+			return 1;
+		} else if (gchild == 0) {
+			close(p[WRITE]);
+			while(1)
+				sleep(1000);
+		} else {
+			if (write(p[WRITE], &gchild, sizeof(int)) != sizeof(int)) {
+				pr_perror("write");
+				return 1;
+			}
+			close(p[WRITE]);
+			if (waitpid(gchild, &status, 0) != gchild) {
+				pr_perror("waitpid");
+				return 1;
+			}
+
+			if (!WIFSIGNALED(status)) {
+				fail("Expected grandchild to be terminated by a signal");
+				return 1;
+			}
+
+			if (WTERMSIG(status) != SIGKILL) {
+				fail("Expected grandchild to be terminated by SIGKILL");
+				return 1;
+			}
+
+			return 0;
+		}
+	}
+
+	ret = open_pidfd_pair(cpidfd, child);
+	if (ret)
+		return 1;
+
+	close(p[WRITE]);
+	if (read(p[READ], &gchild, sizeof(int)) != sizeof(int)) {
+		pr_perror("write");
+		return 1;
+	}
+	close(p[READ]);
+
+	ret = open_pidfd_pair(gpidfd, gchild);
+	if (ret)
+		return 1;
+
+	/*
+	* We kill grandchild and child processes only after opening pidfds.
+	*/
+	if (pidfd_send_signal(gpidfd[0], SIGKILL, NULL, 0)) {
+		pr_perror("pidfd_send_signal");
+		goto fail_close;
+	}
+
+	if (waitpid(child, &status, 0) != child) {
+		pr_perror("waitpid");
+		goto fail_close;
+	}
+
+	if (!WIFEXITED(status)) {
+		fail("Expected child to exit normally");
+		goto fail_close;
+	}
+
+	if (WEXITSTATUS(status) != 0) {
+		fail("Expected child to exit with 0");
+		goto fail_close;
+	}
+	usleep(1000);
+
+	if (kill(gchild, 0) != -1 && errno != ESRCH) {
+		fail("Expected grand child to not exist");
+		goto fail_close;
+	}
+
+	if (kill(child, 0) != -1 && errno != ESRCH) {
+		fail("Expected child to not exist");
+		goto fail_close;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	ret = compare_pidfds(cpidfd);
+	if (ret) {
+		fail("inodes not same for same pid");
+		goto fail_close;
+	}
+
+	ret = compare_pidfds(gpidfd);
+	if (ret) {
+		fail("inodes not same for same pid");
+		goto fail_close;
+	}
+
+	statx(cpidfd[0], "", AT_EMPTY_PATH, STATX_ALL, &stats[0]);
+	statx(gpidfd[0], "", AT_EMPTY_PATH, STATX_ALL, &stats[1]);
+	if (stats[0].stx_ino == stats[1].stx_ino) {
+		fail("pidfds pointing to diff pids should have diff inodes");
+		goto fail_close;
+	}
+
+	pass();
+	close(cpidfd[0]);
+	close(cpidfd[1]);
+	close(gpidfd[0]);
+	close(gpidfd[1]);
+	return 0;
+
+fail_close:
+	close(cpidfd[0]);
+	close(cpidfd[1]);
+	close(gpidfd[0]);
+	close(gpidfd[1]);
+	return 1;
+}
diff --git a/test/zdtm/static/pidfd_diffdead.c b/test/zdtm/static/pidfd_diffdead.c
new file mode 100644
index 000000000..5bc1911a5
--- /dev/null
+++ b/test/zdtm/static/pidfd_diffdead.c
@@ -0,0 +1,228 @@
+#include <sys/statfs.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Check C/R of processes that point to a common dead pidfd\n";
+const char *test_author = "Bhavik Sachdev <b.sachdev1904@gmail.com>";
+
+#ifndef PID_FS_MAGIC
+#define PID_FS_MAGIC 0x50494446
+#endif
+
+/*
+ * main
+ *	`- child
+ *		`- grandchild
+ *
+ * main and child open a pidfd for grandchild.
+ * Before C/R we kill grandchild.
+ * We end up with two pidfds in two diff processes that point to the same dead process.
+ */
+
+static long get_fs_type(int lfd)
+{
+	struct statfs fst;
+
+	if (fstatfs(lfd, &fst)) {
+		return -1;
+	}
+	return fst.f_type;
+}
+
+static int pidfd_open(pid_t pid, unsigned int flags)
+{
+	return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static int pidfd_send_signal(int pidfd, int sig, siginfo_t *info, unsigned int flags)
+{
+	return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
+}
+
+static int check_for_pidfs(void)
+{
+	long type;
+	int pidfd = pidfd_open(getpid(), 0);
+	if (pidfd < 0) {
+		pr_perror("pidfd open() failed");
+		return -1;
+	}
+	type = get_fs_type(pidfd);
+	close(pidfd);
+	return type == PID_FS_MAGIC;
+}
+
+int main(int argc, char *argv[])
+{
+#define READ  0
+#define WRITE 1
+
+	int child, ret, gchild, status;
+	struct statx stat;
+	task_waiter_t t;
+	unsigned long long ino;
+
+	/*
+	 * We use the inop pipe to send the inode number of the
+	 * pidfd opened in the child to the main process for
+	 * comparison.
+	 */
+	int p[2];
+	int pidfd;
+
+	test_init(argc, argv);
+	task_waiter_init(&t);
+
+	ret = check_for_pidfs();
+	if (ret < 0)
+		return 1;
+
+	if (ret == 0) {
+		test_daemon();
+		test_waitsig();
+		skip("Test requires pidfs. skipping...");
+		pass();
+		return 0;
+	}
+
+	if (pipe(p)) {
+		pr_perror("pipe");
+		return 1;
+	}
+
+	child = test_fork();
+	if (child < 0) {
+		pr_perror("fork");
+		return 1;
+	} else if (child == 0) {
+		int gchild;
+		gchild = test_fork();
+		if (gchild < 0) {
+			pr_perror("fork");
+			return 1;
+		} else if (gchild == 0) {
+			close(p[READ]);
+			close(p[WRITE]);
+			while (1)
+				sleep(1000);
+		} else {
+			if (write(p[WRITE], &gchild, sizeof(int)) != sizeof(int)) {
+				pr_perror("write");
+				return 1;
+			}
+
+			pidfd = pidfd_open(gchild, 0);
+			if (pidfd < 0) {
+				pr_perror("pidfd_open");
+				return 1;
+			}
+
+			if (waitpid(gchild, &status, 0) != gchild) {
+				pr_perror("waitpid");
+				return 1;
+			}
+
+			if (!WIFSIGNALED(status)) {
+				fail("Expected grandchild to be terminated by a signal");
+				return 1;
+			}
+
+			if (WTERMSIG(status) != SIGKILL) {
+				fail("Expected grandchild to be terminated by SIGKILL");
+				return 1;
+			}
+			task_waiter_complete(&t, 1);
+
+			test_waitsig();
+
+			if (statx(pidfd, "", AT_EMPTY_PATH, STATX_ALL, &stat) < 0) {
+				pr_perror("statx");
+				return 1;
+			}
+
+			close(p[WRITE]);
+			if (read(p[READ], &ino, sizeof(ino)) != sizeof(ino)) {
+				pr_perror("read");
+				return 1;
+			}
+			close(p[READ]);
+			close(pidfd);
+
+			/* ino number should be same because both pidfds were for the same process */
+			if (ino != stat.stx_ino) {
+				exit(1);
+			}
+			exit(0);
+		}
+	}
+
+	if (read(p[READ], &gchild, sizeof(int)) != sizeof(int)) {
+		pr_perror("write");
+		return 1;
+	}
+
+	pidfd = pidfd_open(gchild, 0);
+	if (pidfd < 0) {
+		pr_perror("pidfd_open");
+		return 1;
+	}
+
+	/*
+	* We kill grandchild process only after opening pidfd.
+	*/
+	if (pidfd_send_signal(pidfd, SIGKILL, NULL, 0)) {
+		pr_perror("pidfd_send_signal");
+		return 1;
+	}
+
+	/* Wait for child to waitpid on gchild */
+	task_waiter_wait4(&t, 1);
+
+	test_daemon();
+	test_waitsig();
+
+	close(p[READ]);
+	if (statx(pidfd, "", AT_EMPTY_PATH, STATX_ALL, &stat) < 0) {
+		pr_perror("statx");
+		goto err;
+	}
+
+	/* Send inode number of pidfd to child for comparison */
+	if (write(p[WRITE], &stat.stx_ino, sizeof(stat.stx_ino)) != sizeof(stat.stx_ino)) {
+		pr_perror("write");
+		goto err;
+	}
+	close(p[WRITE]);
+
+	if (kill(child, SIGTERM)) {
+		pr_perror("kill");
+		goto err;
+	}
+
+	if (waitpid(child, &status, 0) != child) {
+		pr_perror("waitpid");
+		goto err;
+	}
+
+	if (!WIFEXITED(status)) {
+		fail("Expected child to terminate normally");
+		goto err;
+	}
+
+	if (WEXITSTATUS(status) != 0) {
+		fail("Child failed");
+		goto err;
+	}
+
+	pass();
+	close(pidfd);
+	return 0;
+err:
+	close(pidfd);
+	return 1;
+}
diff --git a/test/zdtm/static/pidfd_kill.c b/test/zdtm/static/pidfd_kill.c
new file mode 100644
index 000000000..6232d033a
--- /dev/null
+++ b/test/zdtm/static/pidfd_kill.c
@@ -0,0 +1,128 @@
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Kill child and grandchild process using pidfds\n";
+const char *test_author = "Bhavik Sachdev <b.sachdev1904@gmail.com>";
+
+static int pidfd_open(pid_t pid, unsigned int flags)
+{
+	return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags)
+{
+	return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
+}
+
+static int wait_for_child(int child)
+{
+	int status;
+	if (waitpid(child, &status, 0) != child) {
+		pr_perror("waitpid()");
+		return 1;
+	}
+
+	if (status != 0) {
+		test_msg("%d:%d:%d:%d", WIFEXITED(status), WEXITSTATUS(status),
+			WIFSIGNALED(status), WTERMSIG(status));
+	}
+
+	return 0;
+}
+
+int main(int argc, char* argv[])
+{
+	#define READ 0
+	#define WRITE 1
+
+	int child, gchild, cpidfd, gpidfd, gchild_pid, ret;
+	int p[2];
+
+	if (pipe(p)) {
+		pr_perror("pipe");
+		return 1;
+	}
+
+	test_init(argc, argv);
+
+	child = fork();
+	if (child < 0) {
+		pr_perror("fork");
+		return 1;
+	}
+
+	if (child == 0) {
+		gchild = fork();
+		if (gchild < 0) {
+			pr_perror("fork");
+			return 1;
+		}
+
+		if (gchild == 0) {
+			test_waitsig();
+			return 0;
+		}
+
+		close(p[READ]);
+		if (write(p[WRITE], &gchild, sizeof(gchild))
+			!= sizeof(gchild)) {
+			pr_perror("write");
+			return 1;
+		}
+		close(p[WRITE]);
+
+		test_waitsig();
+		return wait_for_child(gchild);
+	}
+
+	cpidfd = pidfd_open(child, 0);
+	if (cpidfd < 0) {
+		pr_perror("pidfd_open");
+		return 1;
+	}
+
+	close(p[WRITE]);
+	if (read(p[READ], &gchild_pid, sizeof(gchild_pid))
+		!= sizeof(gchild_pid)) {
+		pr_perror("read");
+		return 1;
+	}
+	close(p[READ]);
+
+	gpidfd = pidfd_open(gchild_pid, 0);
+	if (gpidfd < 0) {
+		pr_perror("pidfd_open");
+		return 1;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	if (pidfd_send_signal(gpidfd, SIGKILL, NULL, 0)) {
+		pr_perror("Could not send signal");
+		goto fail_close;
+	}
+
+	if (pidfd_send_signal(cpidfd, SIGKILL, NULL, 0)) {
+		pr_perror("Could not send signal");
+		goto fail_close;
+	}
+
+	ret = wait_for_child(child);
+	if (ret)
+		goto fail_close;
+
+	pass();
+	close(cpidfd);
+	close(gpidfd);
+	return 0;
+
+fail_close:
+	fail();
+	close(cpidfd);
+	close(gpidfd);
+	return 1;
+}
diff --git a/test/zdtm/static/pidfd_of_thread.c b/test/zdtm/static/pidfd_of_thread.c
new file mode 100644
index 000000000..d232c7ac1
--- /dev/null
+++ b/test/zdtm/static/pidfd_of_thread.c
@@ -0,0 +1,114 @@
+#include <sched.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <fcntl.h>
+
+#include "zdtmtst.h"
+#include "lock.h"
+
+const char *test_doc = "Check C/R of pidfds that point to threads\n";
+const char *test_author = "Bhavik Sachdev <b.sachdev1904@gmail.com>";
+
+/* see also: https://codebrowser.dev/glibc/glibc/sysdeps/unix/sysv/linux/tst-clone3.c.html */
+
+#ifndef PIDFD_THREAD
+#define PIDFD_THREAD	O_EXCL
+#endif
+
+#ifndef PIDFD_SIGNAL_THREAD
+#define PIDFD_SIGNAL_THREAD		(1UL << 0)
+#endif
+
+#ifndef PID_FS_MAGIC
+#define PID_FS_MAGIC 0x50494446
+#endif
+
+static long get_fs_type(int lfd)
+{
+	struct statfs fst;
+
+	if (fstatfs(lfd, &fst)) {
+		return -1;
+	}
+	return fst.f_type;
+}
+
+static int pidfd_open(pid_t pid, unsigned int flags)
+{
+	return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags)
+{
+	return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
+}
+
+static int thread_func(void *a)
+{
+	test_waitsig();
+	return 0;
+}
+
+#define CTID_INIT_VAL 1
+
+int main(int argc, char* argv[])
+{
+	char st[64 * 1024] __attribute__ ((aligned));
+	pid_t tid;
+	int pidfd, test_pidfd;
+	futex_t exited;
+
+	int clone_flags = CLONE_THREAD;
+	clone_flags |= CLONE_VM | CLONE_SIGHAND;
+	clone_flags |= CLONE_CHILD_CLEARTID;
+
+	test_init(argc, argv);
+
+	test_pidfd = pidfd_open(getpid(), 0);
+	if (test_pidfd < 0) {
+		pr_perror("pidfd_open() failed");
+		return 1;
+	}
+
+	/* PIDFD_THREAD, PIDFD_SIGNAL_THREAD are supported only with pidfs */
+	if (get_fs_type(test_pidfd) != PID_FS_MAGIC) {
+		test_daemon();
+		test_waitsig();
+		skip("pidfs not supported.");
+		close(test_pidfd);
+		return 0;
+	}
+	close(test_pidfd);
+
+	futex_set(&exited, CTID_INIT_VAL);
+
+	tid = clone(thread_func, st + sizeof(st), clone_flags, NULL, NULL, NULL, &(exited.raw));
+	if (tid == -1) {
+	    pr_perror("clone() failed");
+	    return 1;
+	}
+
+	test_msg("Successfully created a thread with tid: %d\n", tid);
+	pidfd = pidfd_open(tid, PIDFD_THREAD);
+	if (pidfd < 0) {
+	    pr_perror("pidfd_open() failed");
+	    return 1;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	if (pidfd_send_signal(pidfd, SIGTERM, NULL, PIDFD_SIGNAL_THREAD)) {
+	    pr_perror("pidfd_send_signal() failed");
+	    fail();
+	    close(pidfd);
+	    return 1;
+	}
+
+	test_msg("Waiting for thread to exit\n");
+	futex_wait_until(&exited, 0);
+
+	pass();
+	close(pidfd);
+	return 0;
+}
diff --git a/test/zdtm/static/pidfd_of_thread.desc b/test/zdtm/static/pidfd_of_thread.desc
new file mode 100644
index 000000000..802caed65
--- /dev/null
+++ b/test/zdtm/static/pidfd_of_thread.desc
@@ -0,0 +1 @@
+{'flags': 'noauto crfail'}
diff --git a/test/zdtm/static/pidfd_self.c b/test/zdtm/static/pidfd_self.c
new file mode 100644
index 000000000..2730ee123
--- /dev/null
+++ b/test/zdtm/static/pidfd_self.c
@@ -0,0 +1,140 @@
+#include <sys/syscall.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Check pidfd /proc/self/fdinfo/<pidfd> entry remains consistent after checkpoint/restore\n";
+const char *test_author = "Bhavik Sachdev <b.sachdev1904@gmail.com>";
+
+struct pidfd_status {
+	unsigned int flags;
+	pid_t pid;
+};
+
+static int pidfd_open(pid_t pid, unsigned int flags)
+{
+	return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags)
+{
+	return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
+}
+
+static void show_pidfd(char *prefix, struct pidfd_status *s)
+{
+	test_msg("\n\t%s\n\tflags: 0%o\n\tpid: %d\n", prefix, s->flags, s->pid);
+}
+
+static int parse_self_fdinfo(int pidfd, struct pidfd_status *s)
+{
+	char buf[256];
+	int ret = -1;
+	FILE *f;
+
+	sprintf(buf, "/proc/self/fdinfo/%d", pidfd);
+	f = fopen(buf, "r");
+	if (!f) {
+		perror("Can't open /proc/self/fdinfo/ to parse");
+		return -1;
+	}
+
+	memset(s, 0, sizeof(*s));
+
+	/*
+	* flags:  file access mode (octal) 02000002 => [O_RDWR | O_CLOEXEC]
+	* pid:    the pid to which we have pidfd open
+	*/
+	while (fgets(buf, sizeof(buf), f)) {
+		if (!fgets(buf, sizeof(buf), f))
+			goto parse_err;
+
+		if (sscanf(buf, "flags: 0%o", &s->flags) != 1) {
+			goto parse_err;
+		}
+
+		if (!fgets(buf, sizeof(buf), f))
+			goto parse_err;
+		if (!fgets(buf, sizeof(buf), f))
+			goto parse_err;
+
+		if (!fgets(buf, sizeof(buf), f))
+			goto parse_err;
+
+		if (sscanf(buf, "Pid: %d", &s->pid) != 1)
+			goto parse_err;
+		ret = 0;
+		break;
+	}
+
+	if (ret)
+		goto parse_err;
+err:
+	fclose(f);
+	return ret;
+
+parse_err:
+	pr_perror("Format error");
+	goto err;
+}
+
+static int check_pidfd(int fd, struct pidfd_status *old)
+{
+	struct pidfd_status new;
+
+	if (parse_self_fdinfo(fd, &new))
+		return -1;
+
+	show_pidfd("restored", &new);
+
+	if (old->flags != new.flags || old->pid != new.pid)
+		return -1;
+
+	return 0;
+}
+
+int main(int argc, char* argv[])
+{
+	struct pidfd_status old;
+	int pidfd, ret;
+
+	test_init(argc, argv);
+
+	pidfd = pidfd_open(getpid(), 0);
+	if (pidfd < 0) {
+		pr_perror("pidfd_open failed");
+		return 1;
+	}
+
+	parse_self_fdinfo(pidfd, &old);
+
+	show_pidfd("old", &old);
+
+	if (pidfd_send_signal(pidfd, 0, NULL, 0)) {
+		pr_perror("Could not send signal");
+		return 1;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	ret = check_pidfd(pidfd, &old);
+	if (ret) {
+		fail();
+		goto err;
+	}
+
+	if (pidfd_send_signal(pidfd, 0, NULL, 0)) {
+		pr_perror("Could not send signal");
+		fail();
+		goto err;
+	}
+
+	pass();
+	close(pidfd);
+	return 0;
+err:
+	close(pidfd);
+	return 1;
+}
diff --git a/test/zdtm/static/pthread00-pac.c b/test/zdtm/static/pthread00-pac.c
new file mode 120000
index 000000000..3ee8dc1f1
--- /dev/null
+++ b/test/zdtm/static/pthread00-pac.c
@@ -0,0 +1 @@
+pthread00.c
\ No newline at end of file
diff --git a/test/zdtm/static/pthread_timers.c b/test/zdtm/static/pthread_timers.c
index 5246a985f..b1b2a9a23 100644
--- a/test/zdtm/static/pthread_timers.c
+++ b/test/zdtm/static/pthread_timers.c
@@ -1,5 +1,6 @@
 #include <errno.h>
 #include <stdlib.h>
+#include <inttypes.h>
 #include <string.h>
 #include <signal.h>
 #include <time.h>
@@ -69,7 +70,8 @@ int main(int argc, char **argv)
 	}
 
 	if (itimerspec.it_interval.tv_nsec != TEST_INTERVAL_NSEC || itimerspec.it_interval.tv_sec) {
-		pr_perror("wrong interval: %ld:%ld", itimerspec.it_interval.tv_sec, itimerspec.it_interval.tv_nsec);
+		pr_perror("wrong interval: %" PRId64 ":%" PRId64,
+			  (int64_t)itimerspec.it_interval.tv_sec, (int64_t)itimerspec.it_interval.tv_nsec);
 		return 1;
 	}
 
diff --git a/test/zdtm/static/rseq00.c b/test/zdtm/static/rseq00.c
index 471ad6a43..7add7801e 100644
--- a/test/zdtm/static/rseq00.c
+++ b/test/zdtm/static/rseq00.c
@@ -46,12 +46,15 @@ static inline void *__criu_thread_pointer(void)
 static inline void unregister_glibc_rseq(void)
 {
 	struct rseq *rseq = (struct rseq *)((char *)__criu_thread_pointer() + __rseq_offset);
+	unsigned int size = __rseq_size;
 
 	/* hack: mark glibc rseq structure as failed to register */
 	rseq->cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
 
 	/* unregister rseq */
-	syscall(__NR_rseq, (void *)rseq, __rseq_size, 1, RSEQ_SIG);
+	if (__rseq_size < 32)
+		size = 32;
+	syscall(__NR_rseq, (void *)rseq, size, 1, RSEQ_SIG);
 }
 #else
 static inline void unregister_glibc_rseq(void)
diff --git a/test/zdtm/static/s390x_regs_check.c b/test/zdtm/static/s390x_regs_check.c
index 40c480b3f..82dca0519 100644
--- a/test/zdtm/static/s390x_regs_check.c
+++ b/test/zdtm/static/s390x_regs_check.c
@@ -40,13 +40,13 @@ const char *test_author = "Michael Holzheu <holzheu@linux.vnet.ibm.com>";
  *
  * - Verify that "criu restore" sets the correct register sets
  *   from "criu dump":
- *   $ zdtmp.py run -t zdtm/static/s390x_regs_check
+ *   $ zdtm.py run -t zdtm/static/s390x_regs_check
  *
  * - Verify that dumpee continues running with correct registers after
  *   parasite injection:
- *   $ zdtmp.py run --norst -t zdtm/static/s390x_regs_check
- *   $ zdtmp.py run --norst --pre 2 -t zdtm/static/s390x_regs_check
- *   $ zdtmp.py run --check-only -t zdtm/static/s390x_regs_check
+ *   $ zdtm.py run --norst -t zdtm/static/s390x_regs_check
+ *   $ zdtm.py run --norst --pre 2 -t zdtm/static/s390x_regs_check
+ *   $ zdtm.py run --check-only -t zdtm/static/s390x_regs_check
  */
 #define NR_THREADS     2
 #define NR_THREADS_ALL (NR_THREADS + 1)
diff --git a/test/zdtm/static/sched_policy00.c b/test/zdtm/static/sched_policy00.c
index dc71eed94..a35135050 100644
--- a/test/zdtm/static/sched_policy00.c
+++ b/test/zdtm/static/sched_policy00.c
@@ -51,7 +51,7 @@ int main(int argc, char **argv)
 	}
 
 	p.sched_priority = param;
-	if (sched_setscheduler(pid, SCHED_RR, &p)) {
+	if (sched_setscheduler(pid, SCHED_RR | SCHED_RESET_ON_FORK, &p)) {
 		pr_perror("Can't set policy");
 		kill(pid, SIGKILL);
 		return -1;
@@ -61,7 +61,7 @@ int main(int argc, char **argv)
 	test_waitsig();
 
 	ret = sched_getscheduler(pid);
-	if (ret != SCHED_RR) {
+	if (ret != (SCHED_RR | SCHED_RESET_ON_FORK)) {
 		fail("Broken/No policy");
 		err++;
 	}
diff --git a/test/zdtm/static/scm00.c b/test/zdtm/static/scm00.c
index d66975582..670e6fd6a 100644
--- a/test/zdtm/static/scm00.c
+++ b/test/zdtm/static/scm00.c
@@ -105,6 +105,9 @@ int main(int argc, char **argv)
 	p[1] = p[0];
 	p[0] = -1;
 #endif
+#endif
+#ifdef CLOSE_SENDER_FD
+	close(sk[0]);
 #endif
 
 	test_daemon();
diff --git a/test/zdtm/static/scm06.desc b/test/zdtm/static/scm06.desc
index 2eac7e654..38cc3be51 100644
--- a/test/zdtm/static/scm06.desc
+++ b/test/zdtm/static/scm06.desc
@@ -1 +1,4 @@
-{'flags': 'suid'}
+# This test isn't executed in the host flavor (in the same network namespace,
+# because the kernel releases a test socket asynchronously, so the restore
+# can fail if it is executed before the kernel actually destroys the socket.
+{'flags': 'suid', 'flavor': 'ns uns'}
diff --git a/test/zdtm/static/scm09.c b/test/zdtm/static/scm09.c
new file mode 120000
index 000000000..4cab0edd2
--- /dev/null
+++ b/test/zdtm/static/scm09.c
@@ -0,0 +1 @@
+scm00.c
\ No newline at end of file
diff --git a/test/zdtm/static/seccomp_filter_inheritance.c b/test/zdtm/static/seccomp_filter_inheritance.c
index 7a86cd85e..5afcb3f84 100644
--- a/test/zdtm/static/seccomp_filter_inheritance.c
+++ b/test/zdtm/static/seccomp_filter_inheritance.c
@@ -100,7 +100,7 @@ int main(int argc, char **argv)
 		if (filter_syscall(__NR_ptrace) < 0)
 			_exit(1);
 
-		if (filter_syscall(__NR_fstat) < 0)
+		if (filter_syscall(__NR_statx) < 0)
 			_exit(1);
 
 		zdtm_seccomp = 1;
diff --git a/test/zdtm/static/seccomp_no_new_privs.c b/test/zdtm/static/seccomp_no_new_privs.c
new file mode 100644
index 000000000..95f9501ed
--- /dev/null
+++ b/test/zdtm/static/seccomp_no_new_privs.c
@@ -0,0 +1,42 @@
+#include <stdlib.h>
+#include <sys/prctl.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Check that NO_NEW_PRIVS attribute is restored";
+const char *test_author = "Michał Mirosław <emmir@google.com>";
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+	test_init(argc, argv);
+
+	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+	if (ret < 0) {
+		pr_perror("Can't read NO_NEW_PRIVS attribute");
+		return 1;
+	}
+	if (ret != 0)
+		fail("initial NO_NEW_PRIVS = %d != 0", ret);
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	if (ret) {
+		pr_perror("Can't set NO_NEW_PRIVS attribute");
+		return 1;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+	if (ret < 0) {
+		pr_perror("Can't read NO_NEW_PRIVS attribute");
+		return 1;
+	}
+	if (ret != 1)
+		fail("restored NO_NEW_PRIVS = %d != 1", ret);
+
+	pass();
+	return 0;
+}
diff --git a/test/zdtm/static/selinux00.checkskip b/test/zdtm/static/selinux00.checkskip
index 8d946a75e..4c85647d1 100755
--- a/test/zdtm/static/selinux00.checkskip
+++ b/test/zdtm/static/selinux00.checkskip
@@ -2,6 +2,19 @@
 
 test -d /sys/fs/selinux || exit 1
 
+# check if necessary commands are installed
+if ! command -v setenforce &>/dev/null; then
+	exit 1
+fi
+
+if ! command -v setsebool &>/dev/null; then
+	exit 1
+fi
+
+if ! command -v getsebool &>/dev/null; then
+	exit 1
+fi
+
 # See selinux00.hook for details
 
 getsebool unconfined_dyntrans_all > /dev/null 2>&1
diff --git a/test/zdtm/static/shm-hugetlb.checkskip b/test/zdtm/static/shm-hugetlb.checkskip
new file mode 100755
index 000000000..df2370815
--- /dev/null
+++ b/test/zdtm/static/shm-hugetlb.checkskip
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+# will fail with EOPNOTSUPP
+cat /proc/sys/vm/nr_hugepages &> /dev/null
diff --git a/test/zdtm/static/sk-unix-listen01.c b/test/zdtm/static/sk-unix-listen01.c
new file mode 100644
index 000000000..5c9274acb
--- /dev/null
+++ b/test/zdtm/static/sk-unix-listen01.c
@@ -0,0 +1,117 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <sys/un.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <limits.h>
+#include <fcntl.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Test in-flight unix sockets with data in them\n";
+const char *test_author = "Andrei Vagin <avagin@gmail.com>";
+
+#define SK_DATA "packet"
+
+char *filename;
+TEST_OPTION(filename, string, "socket file name", 1);
+
+#define TEST_MODE 0640
+
+#ifdef ZDTM_UNIX_SEQPACKET
+#define SOCK_TYPE SOCK_SEQPACKET
+#else
+#define SOCK_TYPE SOCK_STREAM
+#endif
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_un addr;
+	unsigned int addrlen;
+	int ssk, sk;
+
+	char path[PATH_MAX];
+	char *cwd;
+	int ret;
+
+	test_init(argc, argv);
+
+	cwd = get_current_dir_name();
+	if (!cwd)
+		return pr_perror("get_current_dir_name");
+
+	snprintf(path, sizeof(path), "%s/%s", cwd, filename);
+	unlink(path);
+
+	addr.sun_family = AF_UNIX;
+	addrlen = strlen(filename);
+	if (addrlen > sizeof(addr.sun_path))
+		return pr_err("address is too long");
+	memcpy(addr.sun_path, filename, addrlen);
+	addrlen += sizeof(addr.sun_family);
+
+	ssk = socket(AF_UNIX, SOCK_TYPE, 0);
+	if (ssk == -1)
+		return pr_perror("socket");
+
+	sk = socket(AF_UNIX, SOCK_TYPE, 0);
+	if (sk < 0)
+		return pr_perror("socket");
+
+	ret = bind(ssk, (struct sockaddr *)&addr, addrlen);
+	if (ret)
+		return pr_perror("bind");
+
+	ret = listen(ssk, 16);
+	if (ret)
+		return pr_perror("listen");
+
+	if (connect(sk, (struct sockaddr *)&addr, addrlen))
+		return pr_perror("connect");
+
+#ifdef SK_UNIX_LISTEN02
+	{
+		char buf[64];
+		memset(buf, 0, sizeof(buf));
+		write(sk, SK_DATA, sizeof(SK_DATA));
+	}
+#endif
+
+#ifdef SK_UNIX_LISTEN03
+	close(sk);
+	sk = -1;
+#endif
+
+	test_daemon();
+	test_waitsig();
+
+	if (sk != -1)
+		close(sk);
+
+	ret = accept(ssk, NULL, NULL);
+	if (ret < 0)
+		return fail("accept");
+
+#ifdef SK_UNIX_LISTEN02
+	{
+		char buf[64];
+		if (read(ret, &buf, sizeof(buf)) != sizeof(SK_DATA))
+			return pr_perror("read");
+
+		if (strcmp(buf, SK_DATA))
+			return fail("data corrupted");
+	}
+#endif
+
+	close(ssk);
+	unlink(path);
+
+	pass();
+	return 0;
+}
diff --git a/test/zdtm/static/sk-unix-listen02.c b/test/zdtm/static/sk-unix-listen02.c
new file mode 120000
index 000000000..1211f4666
--- /dev/null
+++ b/test/zdtm/static/sk-unix-listen02.c
@@ -0,0 +1 @@
+sk-unix-listen01.c
\ No newline at end of file
diff --git a/test/zdtm/static/unlink_largefile.desc b/test/zdtm/static/sk-unix-listen02.desc
similarity index 100%
rename from test/zdtm/static/unlink_largefile.desc
rename to test/zdtm/static/sk-unix-listen02.desc
diff --git a/test/zdtm/static/sk-unix-listen03.c b/test/zdtm/static/sk-unix-listen03.c
new file mode 120000
index 000000000..1211f4666
--- /dev/null
+++ b/test/zdtm/static/sk-unix-listen03.c
@@ -0,0 +1 @@
+sk-unix-listen01.c
\ No newline at end of file
diff --git a/test/zdtm/static/sk-unix-listen03.desc b/test/zdtm/static/sk-unix-listen03.desc
new file mode 100644
index 000000000..ded89879a
--- /dev/null
+++ b/test/zdtm/static/sk-unix-listen03.desc
@@ -0,0 +1 @@
+{'flags': 'crfail'}
diff --git a/test/zdtm/static/sk-unix-listen04.c b/test/zdtm/static/sk-unix-listen04.c
new file mode 120000
index 000000000..1211f4666
--- /dev/null
+++ b/test/zdtm/static/sk-unix-listen04.c
@@ -0,0 +1 @@
+sk-unix-listen01.c
\ No newline at end of file
diff --git a/test/zdtm/static/sk-unix-listen04.desc b/test/zdtm/static/sk-unix-listen04.desc
new file mode 100644
index 000000000..ded89879a
--- /dev/null
+++ b/test/zdtm/static/sk-unix-listen04.desc
@@ -0,0 +1 @@
+{'flags': 'crfail'}
diff --git a/test/zdtm/static/sk-unix-restore-fs-share.c b/test/zdtm/static/sk-unix-restore-fs-share.c
new file mode 100644
index 000000000..d4f6dde75
--- /dev/null
+++ b/test/zdtm/static/sk-unix-restore-fs-share.c
@@ -0,0 +1,196 @@
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Test non-empty process group with terminated parent and unix socket";
+const char *test_author = "Qiao Ma <mqaio@linux.alibaba.com>";
+
+char *filename;
+TEST_OPTION(filename, string, "socket file name", 1);
+
+static int create_and_connect(void)
+{
+	struct sockaddr_un addr;
+	int client_fd;
+
+	client_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (client_fd == -1) {
+		pr_perror("socket");
+		return -1;
+	}
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sun_family = AF_UNIX;
+	if (snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", filename) >= (int)sizeof(addr.sun_path)) {
+		pr_err("Socket path too long\n");
+		close(client_fd);
+		return -1;
+	}
+
+	if (connect(client_fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
+		pr_perror("connect");
+		close(client_fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int child(int ready_fd)
+{
+	int listen_fd;
+	struct sockaddr_un addr;
+	int ret = EXIT_FAILURE;
+
+	listen_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (listen_fd == -1) {
+		pr_perror("socket");
+		return EXIT_FAILURE;
+	}
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sun_family = AF_UNIX;
+	if (strlen(filename) >= sizeof(addr.sun_path)) {
+		pr_err("Socket path too long\n");
+		goto cleanup;
+	}
+	strncpy(addr.sun_path, filename, sizeof(addr.sun_path));
+
+	unlink(filename); /* Ignore error if file doesn't exist */
+
+	if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
+		pr_perror("bind");
+		goto cleanup;
+	}
+
+	if (listen(listen_fd, 5) == -1) {
+		pr_perror("listen");
+		goto cleanup;
+	}
+
+	if (create_and_connect() != 0) {
+		pr_err("Failed to create and connect\n");
+		goto cleanup;
+	}
+
+	/* Signal parent that socket is ready */
+	if (write(ready_fd, "1", 1) != 1) {
+		pr_perror("write ready_fd");
+		goto cleanup;
+	}
+
+	/* Wait indefinitely */
+	pause();
+
+	ret = EXIT_SUCCESS;
+cleanup:
+	if (listen_fd != -1)
+		close(listen_fd);
+	unlink(filename);
+
+	return ret;
+}
+
+static int zombie_leader(int *cpid)
+{
+	char buf;
+	pid_t pid;
+	int pipefd[2];
+
+	if (pipe(pipefd) == -1) {
+		pr_perror("pipe");
+		return EXIT_FAILURE;
+	}
+
+	if (setpgid(0, 0) == -1) {
+		pr_perror("setpgid");
+		return EXIT_FAILURE;
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		pr_perror("Failed to fork child");
+		return EXIT_FAILURE;
+	}
+
+	if (pid == 0) {
+		/* Close read end */
+		close(pipefd[0]);
+		exit(child(pipefd[1]));
+	}
+
+	/* Close write end in parent */
+	close(pipefd[1]);
+
+	/* Wait for child to set up socket */
+	if (read(pipefd[0], &buf, 1) != 1) {
+		pr_err("Failed to receive readiness signal from child\n");
+		close(pipefd[0]);
+		return EXIT_FAILURE;
+	}
+	close(pipefd[0]);
+
+	*cpid = pid;
+	return EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+	int ret = EXIT_FAILURE, status;
+	pid_t pid;
+	int *cpid;
+
+	test_init(argc, argv);
+
+	cpid = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+	if (cpid == MAP_FAILED) {
+		pr_perror("mmap");
+		return EXIT_FAILURE;
+	}
+	*cpid = 0;
+
+	pid = fork();
+	if (pid < 0) {
+		pr_perror("Failed to fork zombie");
+		goto out;
+	}
+
+	if (pid == 0)
+		exit(zombie_leader(cpid));
+
+	if (waitpid(pid, &status, 0) < 0) {
+		pr_perror("Failed to waitpid zombie");
+		goto out;
+	}
+
+	if (!WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS) {
+		pr_err("Unexpected exit code: %d\n", WEXITSTATUS(status));
+		goto out;
+	}
+
+	if (!*cpid) {
+		pr_err("Don't know grandchild's pid\n");
+		goto out;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	ret = EXIT_SUCCESS;
+	pass();
+out:
+	/* Clean up */
+	if (*cpid)
+		kill(*cpid, SIGKILL);
+
+	munmap(cpid, sizeof(int));
+
+	return ret;
+}
diff --git a/test/zdtm/static/sk-unix-restore-fs-share.desc b/test/zdtm/static/sk-unix-restore-fs-share.desc
new file mode 100644
index 000000000..6c4afe5f0
--- /dev/null
+++ b/test/zdtm/static/sk-unix-restore-fs-share.desc
@@ -0,0 +1 @@
+{'flavor': 'ns uns'}
diff --git a/test/zdtm/static/sock_ip_opts00.c b/test/zdtm/static/sock_ip_opts00.c
new file mode 100644
index 000000000..cb464365d
--- /dev/null
+++ b/test/zdtm/static/sock_ip_opts00.c
@@ -0,0 +1,114 @@
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/in6.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Check that different ip socket options are restored";
+const char *test_author = "Pavel Tikhomirov <ptikhomirov@virtuozzo.com>";
+
+#ifdef ZDTM_VAL_ZERO
+#define IP_OPT_VAL 0
+#else
+#define IP_OPT_VAL 1
+#endif
+
+struct sk_opt {
+	int level;
+	int opt;
+	int val;
+};
+
+struct sk_opt sk_opts_v4[] = {
+	{ SOL_IP, IP_FREEBIND, IP_OPT_VAL },
+	{ SOL_IP, IP_PKTINFO, IP_OPT_VAL },
+	{ SOL_IP, IP_TTL, 32 },
+	{ SOL_IP, IP_TOS, IPTOS_TOS(IPTOS_THROUGHPUT) },
+};
+
+#ifndef IPV6_FREEBIND
+#define IPV6_FREEBIND 78
+#endif
+
+struct sk_opt sk_opts_v6[] = {
+	{ SOL_IPV6, IPV6_FREEBIND, IP_OPT_VAL },
+	{ SOL_IPV6, IPV6_RECVPKTINFO, IP_OPT_VAL },
+};
+
+struct sk_conf {
+	int domain;
+	int type;
+	int protocol;
+	int sk;
+} sk_confs[] = {
+	{ AF_INET, SOCK_DGRAM, IPPROTO_UDP },
+	{ AF_INET, SOCK_RAW, IPPROTO_UDP },
+	{ AF_INET6, SOCK_DGRAM, IPPROTO_UDP },
+	{ AF_INET6, SOCK_RAW, IPPROTO_UDP },
+};
+
+int main(int argc, char **argv)
+{
+	struct sk_opt *opts;
+	int exit_code = 1;
+	int i, j, val;
+	socklen_t len;
+	int n_opts;
+
+	test_init(argc, argv);
+
+	for (i = 0; i < ARRAY_SIZE(sk_confs); i++) {
+		sk_confs[i].sk = socket(sk_confs[i].domain, sk_confs[i].type, sk_confs[i].protocol);
+		if (sk_confs[i].sk == -1) {
+			pr_perror("socket(%d,%d,%d) failed", sk_confs[i].domain, sk_confs[i].type,
+				  sk_confs[i].protocol);
+			goto close;
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(sk_confs); i++) {
+		opts = sk_confs[i].domain == AF_INET ? sk_opts_v4 : sk_opts_v6;
+		n_opts = sk_confs[i].domain == AF_INET ? ARRAY_SIZE(sk_opts_v4) : ARRAY_SIZE(sk_opts_v6);
+
+		for (j = 0; j < n_opts; j++) {
+			val = opts[j].val;
+			if (setsockopt(sk_confs[i].sk, opts[j].level, opts[j].opt, &val, sizeof(int)) == -1) {
+				pr_perror("setsockopt(%d, %d) failed", opts[j].level, opts[j].opt);
+				goto close;
+			}
+		}
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	for (i = 0; i < ARRAY_SIZE(sk_confs); i++) {
+		opts = sk_confs[i].domain == AF_INET ? sk_opts_v4 : sk_opts_v6;
+		n_opts = sk_confs[i].domain == AF_INET ? ARRAY_SIZE(sk_opts_v4) : ARRAY_SIZE(sk_opts_v6);
+
+		for (j = 0; j < n_opts; j++) {
+			len = sizeof(int);
+			if (getsockopt(sk_confs[i].sk, opts[j].level, opts[j].opt, &val, &len) == -1) {
+				pr_perror("getsockopt(%d, %d) failed", opts[j].level, opts[j].opt);
+				goto close;
+			}
+
+			if (val != opts[j].val) {
+				fail("Unexpected value socket(%d,%d,%d) opts(%d,%d)", sk_confs[i].domain,
+				     sk_confs[i].type, sk_confs[i].protocol, opts[j].level, opts[j].opt);
+				goto close;
+			}
+		}
+	}
+
+	pass();
+	exit_code = 0;
+close:
+	for (i = 0; i < ARRAY_SIZE(sk_confs); i++)
+		close(sk_confs[i].sk);
+	return exit_code;
+}
diff --git a/test/zdtm/static/sock_ip_opts00.desc b/test/zdtm/static/sock_ip_opts00.desc
new file mode 100644
index 000000000..2201f0298
--- /dev/null
+++ b/test/zdtm/static/sock_ip_opts00.desc
@@ -0,0 +1 @@
+{'flags': 'suid', 'feature': 'ipv6_freebind'}
diff --git a/test/zdtm/static/sock_ip_opts01.c b/test/zdtm/static/sock_ip_opts01.c
new file mode 120000
index 000000000..15526f808
--- /dev/null
+++ b/test/zdtm/static/sock_ip_opts01.c
@@ -0,0 +1 @@
+sock_ip_opts00.c
\ No newline at end of file
diff --git a/test/zdtm/static/sock_ip_opts01.desc b/test/zdtm/static/sock_ip_opts01.desc
new file mode 120000
index 000000000..e2c29ca25
--- /dev/null
+++ b/test/zdtm/static/sock_ip_opts01.desc
@@ -0,0 +1 @@
+sock_ip_opts00.desc
\ No newline at end of file
diff --git a/test/zdtm/static/sock_opts00.c b/test/zdtm/static/sock_opts00.c
index 5b4624f6d..854aaa591 100644
--- a/test/zdtm/static/sock_opts00.c
+++ b/test/zdtm/static/sock_opts00.c
@@ -12,21 +12,27 @@ const char *test_author = "Pavel Emelyanov <xemul@parallels.com>";
 #define TEST_PORT 59687
 #define TEST_ADDR INADDR_ANY
 
-#define NOPTS 8
-
 int main(int argc, char **argv)
 {
-	int sock, ret = 0, vname[NOPTS], val[NOPTS], rval, i;
-	socklen_t len = sizeof(int);
+	#define OPT(x) { x, #x }
+	static const struct {
+		int opt;
+		const char *name;
+	} vname[] = {
+		OPT(SO_PRIORITY),
+		OPT(SO_RCVLOWAT),
+		OPT(SO_MARK),
+		OPT(SO_PASSCRED),
+		OPT(SO_PASSSEC),
+		OPT(SO_DONTROUTE),
+		OPT(SO_NO_CHECK),
+		OPT(SO_OOBINLINE),
+	};
+	static const int NOPTS = sizeof(vname) / sizeof(*vname);
+	#undef OPT
 
-	vname[0] = SO_PRIORITY;
-	vname[1] = SO_RCVLOWAT;
-	vname[2] = SO_MARK;
-	vname[3] = SO_PASSCRED;
-	vname[4] = SO_PASSSEC;
-	vname[5] = SO_DONTROUTE;
-	vname[6] = SO_NO_CHECK;
-	vname[7] = SO_OOBINLINE;
+	int sock, usock, sk, ret = 0, val[NOPTS], rval, i;
+	socklen_t len = sizeof(int);
 
 	test_init(argc, argv);
 
@@ -36,30 +42,37 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
+	usock = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (usock < 0) {
+		pr_perror("can't create unix socket");
+		return 1;
+	}
+
 	for (i = 0; i < NOPTS; i++) {
-		ret = getsockopt(sock, SOL_SOCKET, vname[i], &val[i], &len);
+		sk = vname[i].opt == SO_PASSCRED || vname[i].opt == SO_PASSSEC ? usock : sock;
+		ret = getsockopt(sk, SOL_SOCKET, vname[i].opt, &val[i], &len);
 		if (ret) {
-			pr_perror("can't get option %d", i);
+			pr_perror("can't get %s", vname[i].name);
 			return 1;
 		}
 
 		val[i]++;
 
-		ret = setsockopt(sock, SOL_SOCKET, vname[i], &val[i], len);
+		ret = setsockopt(sk, SOL_SOCKET, vname[i].opt, &val[i], len);
 		if (ret) {
-			pr_perror("can't set option %d", i);
+			pr_perror("can't set %s = %d", vname[i].name, val[i]);
 			return 1;
 		}
 
-		ret = getsockopt(sock, SOL_SOCKET, vname[i], &rval, &len);
+		ret = getsockopt(sk, SOL_SOCKET, vname[i].opt, &rval, &len);
 		if (ret) {
-			pr_perror("can't get option %d 2", i);
+			pr_perror("can't re-get %s", vname[i].name);
 			return 1;
 		}
 
 		if (rval != val[i]) {
 			if (rval + 1 == val[i]) {
-				pr_perror("can't reset option %d want %d have %d", i, val[i], rval);
+				pr_perror("failed to set %s: want %d have %d", vname[i].name, val[i], rval);
 				return 1;
 			}
 
@@ -72,20 +85,23 @@ int main(int argc, char **argv)
 	test_waitsig();
 
 	for (i = 0; i < NOPTS; i++) {
-		ret = getsockopt(sock, SOL_SOCKET, vname[i], &rval, &len);
+		sk = vname[i].opt == SO_PASSCRED || vname[i].opt == SO_PASSSEC ? usock : sock;
+		ret = getsockopt(sk, SOL_SOCKET, vname[i].opt, &rval, &len);
 		if (ret) {
-			pr_perror("can't get option %d again", i);
+			pr_perror("can't verify %s", vname[i].name);
 			return 1;
 		}
 
 		if (val[i] != rval) {
-			fail("option %d changed", i);
+			errno = 0;
+			fail("%s changed: %d -> %d", vname[i].name, val[i], rval);
 			return 1;
 		}
 	}
 
 	pass();
 	close(sock);
+	close(usock);
 
 	return 0;
 }
diff --git a/test/zdtm/static/sock_tcp_opts00.c b/test/zdtm/static/sock_tcp_opts00.c
new file mode 100644
index 000000000..8061bc9ea
--- /dev/null
+++ b/test/zdtm/static/sock_tcp_opts00.c
@@ -0,0 +1,96 @@
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Check that different tcp socket options are restored";
+const char *test_author = "Juntong Deng <juntong.deng@outlook.com>";
+
+#ifdef ZDTM_VAL_ZERO
+#define TCP_OPT_VAL 0
+#else
+#define TCP_OPT_VAL 1
+#endif
+
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
+struct sk_opt {
+	int level;
+	int opt;
+	int val;
+};
+
+struct sk_opt tcp_sk_opts[] = {
+	{ SOL_TCP, TCP_CORK, TCP_OPT_VAL },
+	{ SOL_TCP, TCP_NODELAY, TCP_OPT_VAL },
+};
+
+struct sk_conf {
+	int domain;
+	int type;
+	int protocol;
+	int sk;
+} sk_confs[] = {
+	{ AF_INET, SOCK_STREAM, IPPROTO_TCP },
+	{ AF_INET6, SOCK_STREAM, IPPROTO_TCP },
+};
+
+int main(int argc, char **argv)
+{
+	struct sk_opt *opts = tcp_sk_opts;
+	int n_opts = ARRAY_SIZE(tcp_sk_opts);
+	int exit_code = 1;
+	int i, j, val;
+	socklen_t len;
+
+	test_init(argc, argv);
+
+	for (i = 0; i < ARRAY_SIZE(sk_confs); i++) {
+		sk_confs[i].sk = socket(sk_confs[i].domain, sk_confs[i].type, sk_confs[i].protocol);
+		if (sk_confs[i].sk == -1) {
+			pr_perror("socket(%d,%d,%d) failed", sk_confs[i].domain, sk_confs[i].type,
+				  sk_confs[i].protocol);
+			goto close;
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(sk_confs); i++) {
+		for (j = 0; j < n_opts; j++) {
+			val = opts[j].val;
+			if (setsockopt(sk_confs[i].sk, opts[j].level, opts[j].opt, &val, sizeof(int)) == -1) {
+				pr_perror("setsockopt(%d, %d) failed", opts[j].level, opts[j].opt);
+				goto close;
+			}
+		}
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	for (i = 0; i < ARRAY_SIZE(sk_confs); i++) {
+		for (j = 0; j < n_opts; j++) {
+			len = sizeof(int);
+			if (getsockopt(sk_confs[i].sk, opts[j].level, opts[j].opt, &val, &len) == -1) {
+				pr_perror("getsockopt(%d, %d) failed", opts[j].level, opts[j].opt);
+				goto close;
+			}
+
+			if (val != opts[j].val) {
+				fail("Unexpected value socket(%d,%d,%d) opts(%d,%d)", sk_confs[i].domain,
+				     sk_confs[i].type, sk_confs[i].protocol, opts[j].level, opts[j].opt);
+				goto close;
+			}
+		}
+	}
+
+	pass();
+	exit_code = 0;
+close:
+	for (i = 0; i < ARRAY_SIZE(sk_confs); i++)
+		close(sk_confs[i].sk);
+	return exit_code;
+}
diff --git a/test/zdtm/static/sock_tcp_opts00.desc b/test/zdtm/static/sock_tcp_opts00.desc
new file mode 100644
index 000000000..2eac7e654
--- /dev/null
+++ b/test/zdtm/static/sock_tcp_opts00.desc
@@ -0,0 +1 @@
+{'flags': 'suid'}
diff --git a/test/zdtm/static/sock_tcp_opts01.c b/test/zdtm/static/sock_tcp_opts01.c
new file mode 120000
index 000000000..5219c2e98
--- /dev/null
+++ b/test/zdtm/static/sock_tcp_opts01.c
@@ -0,0 +1 @@
+./sock_tcp_opts00.c
\ No newline at end of file
diff --git a/test/zdtm/static/sock_tcp_opts01.desc b/test/zdtm/static/sock_tcp_opts01.desc
new file mode 120000
index 000000000..fb1dfdcd1
--- /dev/null
+++ b/test/zdtm/static/sock_tcp_opts01.desc
@@ -0,0 +1 @@
+./sock_tcp_opts00.desc
\ No newline at end of file
diff --git a/test/zdtm/static/socket-tcp-closed-last-ack.desc b/test/zdtm/static/socket-tcp-closed-last-ack.desc
index d4cfe5064..c77d58477 100644
--- a/test/zdtm/static/socket-tcp-closed-last-ack.desc
+++ b/test/zdtm/static/socket-tcp-closed-last-ack.desc
@@ -1,10 +1,10 @@
 {   'deps': [   '/bin/sh',
                 '/sbin/iptables|/usr/sbin/iptables',
-                '/lib/xtables/libxt_tcp.so|/usr/lib64/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_tcp.so',
-                '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so',
+                '/lib/xtables/libxt_tcp.so|/usr/lib64/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_tcp.so',
+                '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_standard.so',
 	],
 	'opts': '--tcp-established',
 	'flags': 'suid nouser samens',
-	'feature' : 'tcp_half_closed',
+	'feature' : 'tcp_half_closed has_ipt_legacy',
 	'flavor': 'ns uns',
 }
diff --git a/test/zdtm/static/socket-tcp-closing.c b/test/zdtm/static/socket-tcp-closing.c
index 87e1d7533..df291d446 100644
--- a/test/zdtm/static/socket-tcp-closing.c
+++ b/test/zdtm/static/socket-tcp-closing.c
@@ -31,10 +31,13 @@ static int port = 8880;
 
 int fill_sock_buf(int fd)
 {
+	char zdtm[512];
 	int flags;
 	int size;
 	int ret;
 
+	memset(zdtm, 5, sizeof(zdtm));
+
 	flags = fcntl(fd, F_GETFL, 0);
 	if (flags == -1) {
 		pr_perror("Can't get flags");
@@ -47,7 +50,6 @@ int fill_sock_buf(int fd)
 
 	size = 0;
 	while (1) {
-		char zdtm[] = "zdtm test packet";
 		ret = write(fd, zdtm, sizeof(zdtm));
 		if (ret == -1) {
 			if (errno == EAGAIN)
diff --git a/test/zdtm/static/socket-tcp-fin-wait1.hook b/test/zdtm/static/socket-tcp-fin-wait1.hook
index 9504557da..30f8ce071 100755
--- a/test/zdtm/static/socket-tcp-fin-wait1.hook
+++ b/test/zdtm/static/socket-tcp-fin-wait1.hook
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 import sys
 
-sys.path.append("../crit")
+sys.path.append("../lib")
 
 import pycriu
 import os, os.path
diff --git a/test/zdtm/static/socket-tcp-nfconntrack.c b/test/zdtm/static/socket-tcp-ipt-nfconntrack.c
similarity index 100%
rename from test/zdtm/static/socket-tcp-nfconntrack.c
rename to test/zdtm/static/socket-tcp-ipt-nfconntrack.c
diff --git a/test/zdtm/static/socket-tcp-ipt-nfconntrack.desc b/test/zdtm/static/socket-tcp-ipt-nfconntrack.desc
new file mode 100644
index 000000000..53dd82285
--- /dev/null
+++ b/test/zdtm/static/socket-tcp-ipt-nfconntrack.desc
@@ -0,0 +1,6 @@
+{
+    'feature': 'has_ipt_legacy',
+    'flavor': 'h',
+    'opts': '--tcp-established',
+    'flags': 'suid'
+}
diff --git a/test/zdtm/static/socket-tcp-nfconntrack.desc b/test/zdtm/static/socket-tcp-nfconntrack.desc
deleted file mode 100644
index add2513f8..000000000
--- a/test/zdtm/static/socket-tcp-nfconntrack.desc
+++ /dev/null
@@ -1 +0,0 @@
-{'flavor': 'h', 'opts': '--tcp-established', 'flags': 'suid'}
diff --git a/test/zdtm/static/socket-tcp-nft-nfconntrack.c b/test/zdtm/static/socket-tcp-nft-nfconntrack.c
new file mode 120000
index 000000000..8cb60dd03
--- /dev/null
+++ b/test/zdtm/static/socket-tcp-nft-nfconntrack.c
@@ -0,0 +1 @@
+socket-tcp.c
\ No newline at end of file
diff --git a/test/zdtm/static/socket-tcp-nft-nfconntrack.desc b/test/zdtm/static/socket-tcp-nft-nfconntrack.desc
new file mode 100644
index 000000000..38a4eb389
--- /dev/null
+++ b/test/zdtm/static/socket-tcp-nft-nfconntrack.desc
@@ -0,0 +1,7 @@
+{
+    'flavor': 'h',
+    'feature': 'network_lock_nftables',
+    'opts': '--tcp-established',
+    'dopts': '--network-lock nftables',
+    'flags': 'suid'
+}
diff --git a/test/zdtm/static/socket-tcp-reseted.desc b/test/zdtm/static/socket-tcp-reseted.desc
index 3ebdfeef8..ff92e9f9f 100644
--- a/test/zdtm/static/socket-tcp-reseted.desc
+++ b/test/zdtm/static/socket-tcp-reseted.desc
@@ -1,10 +1,10 @@
 {   'deps': [   '/bin/sh',
                 '/sbin/iptables|/usr/sbin/iptables',
-                '/lib/xtables/libxt_tcp.so|/usr/lib64/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_tcp.so',
-                '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so',
-		  '/lib/xtables/libipt_REJECT.so|/usr/lib64/xtables/libipt_REJECT.so|/usr/lib/powerpc64le-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/x86_64-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/xtables/libipt_REJECT.so|/usr/lib/s390x-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/aarch64-linux-gnu/xtables/libipt_REJECT.so',
+                '/lib/xtables/libxt_tcp.so|/usr/lib64/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_tcp.so',
+                '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_standard.so',
+		  '/lib/xtables/libipt_REJECT.so|/usr/lib64/xtables/libipt_REJECT.so|/usr/lib/powerpc64le-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/x86_64-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/xtables/libipt_REJECT.so|/usr/lib/s390x-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/aarch64-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/riscv64-linux-gnu/xtables/libipt_REJECT.so',
        ],
        'opts': '--tcp-established',
        'flags': 'suid nouser samens',
-       'feature' : 'tcp_half_closed'
+       'feature' : 'tcp_half_closed has_ipt_legacy'
 }
diff --git a/test/zdtm/static/socket-tcp-syn-sent.desc b/test/zdtm/static/socket-tcp-syn-sent.desc
index 4cc23c8fc..52382414b 100644
--- a/test/zdtm/static/socket-tcp-syn-sent.desc
+++ b/test/zdtm/static/socket-tcp-syn-sent.desc
@@ -1,9 +1,9 @@
 {   'deps': [   '/bin/sh',
                 '/sbin/iptables|/usr/sbin/iptables',
-                '/lib/xtables/libxt_tcp.so|/usr/lib64/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_tcp.so',
-                '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so',
+                '/lib/xtables/libxt_tcp.so|/usr/lib64/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_tcp.so',
+                '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_standard.so',
 	],
 	'opts': '--tcp-established',
 	'flags': 'suid nouser samens',
-	'feature' : 'tcp_half_closed'
+	'feature' : 'tcp_half_closed has_ipt_legacy'
 }
diff --git a/test/zdtm/static/socket-tcp.c b/test/zdtm/static/socket-tcp.c
index f6ef47385..bc2075496 100644
--- a/test/zdtm/static/socket-tcp.c
+++ b/test/zdtm/static/socket-tcp.c
@@ -67,17 +67,38 @@ int main(int argc, char **argv)
 	int val;
 	socklen_t optlen;
 
-#ifdef ZDTM_CONNTRACK
+#ifdef ZDTM_IPT_CONNTRACK
 	if (unshare(CLONE_NEWNET)) {
 		pr_perror("unshare");
 		return 1;
 	}
 	if (system("ip link set up dev lo"))
 		return 1;
-	if (system("iptables -w -A INPUT -i lo -p tcp -m state --state NEW,ESTABLISHED -j ACCEPT"))
+
+	if (system("iptables-legacy -w -A INPUT -i lo -p tcp -m state --state NEW,ESTABLISHED -j ACCEPT"))
 		return 1;
-	if (system("iptables -w -A INPUT -j DROP"))
+	if (system("iptables-legacy -w -A INPUT -j DROP"))
 		return 1;
+
+#endif
+
+#ifdef ZDTM_NFT_CONNTRACK
+	if (unshare(CLONE_NEWNET)) {
+		pr_perror("unshare");
+		return 1;
+	}
+	if (system("ip link set up dev lo"))
+		return 1;
+
+	if (system("nft add table ip filter"))
+		return 1;
+	if (system("nft 'add chain ip filter INPUT { type filter hook input priority 0 ; }'"))
+		return 1;
+	if (system("nft add rule ip filter INPUT iifname \"lo\" ip protocol tcp ct state new,established counter accept"))
+		return 1;
+	if (system("nft add rule ip filter INPUT counter drop"))
+		return 1;
+
 #endif
 
 #ifdef ZDTM_TCP_LOCAL
diff --git a/test/zdtm/static/socket6_icmp.c b/test/zdtm/static/socket6_icmp.c
new file mode 120000
index 000000000..24d8fd806
--- /dev/null
+++ b/test/zdtm/static/socket6_icmp.c
@@ -0,0 +1 @@
+socket_icmp.c
\ No newline at end of file
diff --git a/test/zdtm/static/socket_icmp.c b/test/zdtm/static/socket_icmp.c
new file mode 100644
index 000000000..f72e348bf
--- /dev/null
+++ b/test/zdtm/static/socket_icmp.c
@@ -0,0 +1,128 @@
+#include "zdtmtst.h"
+
+const char *test_doc = "static test for ICMP socket\n";
+const char *test_author = "समीर सिंह Sameer Singh <lumarzeli30@gmail.com>\n";
+
+/* Description:
+ * Send a ping to localhost using ICMP socket
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#if defined(ZDTM_IPV6)
+#include <netinet/icmp6.h>
+#else
+#include <netinet/ip_icmp.h>
+#endif
+#include <arpa/inet.h>
+#include <sys/time.h>
+#include <netdb.h>
+
+#include "sysctl.h"
+
+#define PACKET_SIZE  64
+#define RECV_TIMEOUT 1
+
+static int echo_id = 1234;
+
+#if defined(ZDTM_IPV6)
+#define TEST_ICMP_ECHOREPLY ICMP6_ECHOREPLY
+#else
+#define TEST_ICMP_ECHOREPLY ICMP_ECHOREPLY
+#endif
+int main(int argc, char **argv)
+{
+	int ret, sock, seq = 0;
+	char packet[PACKET_SIZE], recv_packet[PACKET_SIZE];
+
+	struct timeval tv;
+#if defined(ZDTM_IPV6)
+	struct sockaddr_in6 addr, recv_addr;
+#else
+	struct icmphdr icmp_header, *icmp_reply;
+#endif
+	struct sockaddr_in addr, recv_addr;
+	socklen_t addr_len;
+
+	// Allow GIDs 0-58468 to open an unprivileged ICMP socket
+	if (sysctl_write_str("/proc/sys/net/ipv4/ping_group_range", "0 58468"))
+		return -1;
+
+	test_init(argc, argv);
+
+#if defined(ZDTM_IPV6)
+	sock = socket(PF_INET6, SOCK_DGRAM, IPPROTO_ICMPV6);
+#else
+	sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_ICMP);
+#endif
+	if (sock < 0) {
+		pr_perror("Can't create socket");
+		return 1;
+	}
+
+	tv.tv_sec = RECV_TIMEOUT;
+	tv.tv_usec = 0;
+	if (setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) < 0) {
+		pr_perror("Can't set socket option");
+		return 1;
+	}
+
+	memset(&addr, 0, sizeof(addr));
+	memset(&icmp_header, 0, sizeof(icmp_header));
+#if defined(ZDTM_IPV6)
+	addr.sin6_family = AF_INET6;
+	inet_pton(AF_INET6, "::1", &addr.sin6_addr);
+
+	icmp_header.icmp6_type = ICMP6_ECHO_REQUEST;
+	icmp_header.icmp6_code = 0;
+	icmp_header.icmp6_id = echo_id;
+	icmp_header.icmp6_seq = seq;
+#else
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+	icmp_header.type = ICMP_ECHO;
+	icmp_header.code = 0;
+	icmp_header.un.echo.id = echo_id;
+	icmp_header.un.echo.sequence = seq;
+#endif
+
+	memcpy(packet, &icmp_header, sizeof(icmp_header));
+	memset(packet + sizeof(icmp_header), 0xa5,
+	       PACKET_SIZE - sizeof(icmp_header));
+
+	test_daemon();
+	test_waitsig();
+
+	ret = sendto(sock, packet, PACKET_SIZE, 0,
+		     (struct sockaddr *)&addr, sizeof(addr));
+
+	if (ret < 0) {
+		fail("Can't send");
+		return 1;
+	}
+
+	addr_len = sizeof(recv_addr);
+
+	ret = recvfrom(sock, recv_packet, sizeof(recv_packet), 0,
+		       (struct sockaddr *)&recv_addr, &addr_len);
+
+	if (ret < 0) {
+		fail("Can't recv");
+		return 1;
+	}
+
+	icmp_reply = (struct icmphdr *)recv_packet;
+
+	if (icmp_reply->type != ICMP_ECHOREPLY) {
+		fail("Got no ICMP_ECHO_REPLY");
+		return 1;
+	}
+
+	close(sock);
+
+	pass();
+	return 0;
+}
diff --git a/test/zdtm/static/socket_udp_shutdown.c b/test/zdtm/static/socket_udp_shutdown.c
index 91dc8f30a..a7658b9dd 100644
--- a/test/zdtm/static/socket_udp_shutdown.c
+++ b/test/zdtm/static/socket_udp_shutdown.c
@@ -28,8 +28,8 @@ int main(int argc, char **argv)
 
 	test_init(argc, argv);
 
-	sk1 = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
-	sk2 = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+	sk1 = socket(PF_INET, SOCK_DGRAM | SOCK_NONBLOCK, IPPROTO_UDP);
+	sk2 = socket(PF_INET, SOCK_DGRAM | SOCK_NONBLOCK, IPPROTO_UDP);
 	if (sk1 < 0 || sk2 < 0) {
 		pr_perror("Can't create socket");
 		exit(1);
diff --git a/test/zdtm/static/stopped.c b/test/zdtm/static/stopped.c
index 059a2a92a..26b0174ed 100644
--- a/test/zdtm/static/stopped.c
+++ b/test/zdtm/static/stopped.c
@@ -65,7 +65,7 @@ int main(int argc, char **argv)
 	}
 
 	if (WIFSTOPPED(status))
-		test_msg("The procces stopped\n");
+		test_msg("The process stopped\n");
 	else {
 		fail("The process doesn't stopped");
 		goto out;
diff --git a/test/zdtm/static/stopped03.c b/test/zdtm/static/stopped03.c
new file mode 100644
index 000000000..9a373930f
--- /dev/null
+++ b/test/zdtm/static/stopped03.c
@@ -0,0 +1,161 @@
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+
+#include "zdtmtst.h"
+#include "lock.h"
+
+const char *test_doc = "Check, that stopped by SIGTSTP tasks are restored correctly";
+const char *test_author = "Yuriy Vasiliev <yuriy.vasiliev@openvz.org>";
+
+#define STOP_SIGNO SIGTSTP
+const char *stop_sigstr = "SIGTSTP";
+enum {
+	FUTEX_INITIALIZED = 0,
+	TEST_CRIU,
+	TEST_CHECK,
+	TEST_DONE,
+	TEST_EXIT,
+	TEST_EMERGENCY_ABORT,
+};
+
+struct shared {
+	futex_t fstate;
+	int status;
+	int code;
+} *sh;
+
+static int new_pgrp(void)
+{
+	siginfo_t infop;
+	int ret = 1;
+	pid_t pid;
+
+	/*
+	 * Set the PGID to avoid creating an orphaned process group,
+	 * which is not to be affected by terminal-generated stop signals.
+	 */
+	setpgid(0, 0);
+
+	pid = test_fork();
+	if (pid < 0)
+		goto err_cr;
+
+	if (pid == 0) {
+		/* wait for TEST_EXIT or TEST_EMERGENCY_ABORT*/
+		futex_wait_while_lt(&sh->fstate, TEST_EXIT);
+		exit(0);
+	}
+
+	if (kill(pid, STOP_SIGNO)) {
+		pr_perror("Unable to send %s", stop_sigstr);
+		goto err_cr;
+	}
+
+	if (waitid(P_PID, pid, &infop, WNOWAIT | WSTOPPED) < 0) {
+		pr_perror("Unable to waitid %d", pid);
+		goto err_cont;
+	}
+
+	sh->code = infop.si_code;
+	sh->status = infop.si_status;
+
+	/* Return the control back to MAIN worker to do C/R */
+	futex_set_and_wake(&sh->fstate, TEST_CRIU);
+	futex_wait_while_lt(&sh->fstate, TEST_CHECK);
+
+	infop.si_code = 0;
+	infop.si_status = 0;
+
+	if (waitid(P_PID, pid, &infop, WNOWAIT | WSTOPPED) < 0) {
+		pr_perror("Unable to waitid %d", pid);
+		goto err_cont;
+	}
+
+	sh->code = infop.si_code;
+	sh->status = infop.si_status;
+
+	futex_set_and_wake(&sh->fstate, TEST_DONE);
+	futex_wait_while_lt(&sh->fstate, TEST_EXIT);
+
+	ret = 0;
+err_cont:
+	kill(pid, SIGCONT);
+err_cr:
+	if (ret)
+		futex_set_and_wake(&sh->fstate, TEST_EMERGENCY_ABORT);
+	if (pid > 0)
+		wait(NULL);
+
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int fail = 0;
+	pid_t pid;
+
+	test_init(argc, argv);
+
+	sh = mmap(NULL, sizeof(struct shared), PROT_WRITE | PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	if (sh == MAP_FAILED) {
+		pr_perror("Failed to alloc shared region");
+		return 1;
+	}
+
+	futex_set(&sh->fstate, FUTEX_INITIALIZED);
+
+	pid = test_fork();
+	if (pid < 0) {
+		fail = 1;
+		goto out;
+	}
+
+	if (pid == 0)
+		exit(new_pgrp());
+
+	/* Wait until pgrp is ready to C/R */
+	futex_wait_while_lt(&sh->fstate, TEST_CRIU);
+	if (futex_get(&sh->fstate) == TEST_EMERGENCY_ABORT) {
+		pr_err("Fail in child worker before C/R\n");
+		fail = 1;
+		goto out;
+	}
+
+	if (sh->code != CLD_STOPPED || sh->status != STOP_SIGNO) {
+		pr_err("Process is not in correct state before C/R."
+		       " Expected stop signo: %d. Get stop signo: %d\n",
+		       STOP_SIGNO, sh->status);
+		fail = 1;
+		goto out;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	futex_set_and_wake(&sh->fstate, TEST_CHECK);
+	futex_wait_while_lt(&sh->fstate, TEST_DONE);
+	if (futex_get(&sh->fstate) == TEST_EMERGENCY_ABORT) {
+		pr_err("Fail in child worker after C/R\n");
+		goto out;
+	}
+
+	if (sh->code != CLD_STOPPED || sh->status != STOP_SIGNO) {
+		fail = 1;
+		pr_err("Process is not in correct state after C/R."
+		       " Expected stop signo: %d. Get stop signo: %d\n",
+		       STOP_SIGNO, sh->status);
+	}
+
+	if (!fail)
+		pass();
+
+	futex_set_and_wake(&sh->fstate, TEST_EXIT);
+out:
+	if (pid > 0)
+		wait(NULL);
+
+	munmap(sh, sizeof(struct shared));
+
+	return fail;
+}
diff --git a/test/zdtm/static/stopped04.c b/test/zdtm/static/stopped04.c
new file mode 100644
index 000000000..9bd968aa2
--- /dev/null
+++ b/test/zdtm/static/stopped04.c
@@ -0,0 +1,135 @@
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+
+#include "zdtmtst.h"
+#include "lock.h"
+
+const char *test_doc = "Check, that stopped by SIGTSTP tasks are restored correctly";
+const char *test_author = "Yuriy Vasiliev <yuriy.vasiliev@openvz.org>";
+
+const char *stop_sigstr = "SIGTSTP";
+enum {
+	FUTEX_INITIALIZED = 0,
+	TEST_CRIU,
+	TEST_DONE,
+	TEST_EXIT,
+	TEST_EMERGENCY_ABORT,
+};
+
+struct shared {
+	futex_t fstate;
+	int status;
+	int code;
+} *sh;
+
+static int new_pgrp(void)
+{
+	sigset_t sigset;
+	siginfo_t infop;
+	int ret = 1;
+	pid_t pid;
+
+	/*
+	 * Set the PGID to avoid creating an orphaned process group,
+	 * which is not to be affected by terminal-generated stop signals.
+	 */
+	setpgid(0, 0);
+
+	sigemptyset(&sigset);
+	sigaddset(&sigset, SIGTSTP);
+	sigprocmask(SIG_BLOCK, &sigset, NULL);
+
+	pid = test_fork();
+	if (pid < 0)
+		goto err_cr;
+
+	if (pid == 0) {
+		/* wait for TEST_EXIT or TEST_EMERGENCY_ABORT*/
+		futex_wait_while_lt(&sh->fstate, TEST_EXIT);
+		exit(0);
+	}
+
+	if (kill(pid, SIGSTOP)) {
+		pr_perror("Unable to send %s", stop_sigstr);
+		goto err_cr;
+	}
+
+	if (waitid(P_PID, pid, &infop, WNOWAIT | WSTOPPED) < 0) {
+		pr_perror("Unable to waitid %d", pid);
+		goto err_cont;
+	}
+
+	if (kill(pid, SIGTSTP)) {
+		pr_perror("Unable to send %s", stop_sigstr);
+		goto err_cr;
+	}
+
+	/* Return the control back to MAIN worker to do C/R */
+	futex_set_and_wake(&sh->fstate, TEST_CRIU);
+	futex_wait_while_lt(&sh->fstate, TEST_EXIT);
+
+	ret = 0;
+err_cont:
+	kill(pid, SIGCONT);
+err_cr:
+	if (ret)
+		futex_set_and_wake(&sh->fstate, TEST_EMERGENCY_ABORT);
+	if (pid > 0)
+		wait(NULL);
+
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int fail = 0;
+	pid_t pid;
+
+	test_init(argc, argv);
+
+	sh = mmap(NULL, sizeof(struct shared), PROT_WRITE | PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	if (sh == MAP_FAILED) {
+		pr_perror("Failed to alloc shared region");
+		return 1;
+	}
+
+	futex_set(&sh->fstate, FUTEX_INITIALIZED);
+
+	pid = test_fork();
+	if (pid < 0) {
+		fail = 1;
+		goto out;
+	}
+
+	if (pid == 0)
+		exit(new_pgrp());
+
+	/* Wait until pgrp is ready to C/R */
+	futex_wait_while_lt(&sh->fstate, TEST_CRIU);
+	if (futex_get(&sh->fstate) == TEST_EMERGENCY_ABORT) {
+		pr_err("Fail in child worker before C/R\n");
+		fail = 1;
+		goto out;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	if (futex_get(&sh->fstate) == TEST_EMERGENCY_ABORT) {
+		pr_err("Fail in child worker after C/R\n");
+		goto out;
+	}
+
+	if (!fail)
+		pass();
+
+	futex_set_and_wake(&sh->fstate, TEST_EXIT);
+out:
+	if (pid > 0)
+		wait(NULL);
+
+	munmap(sh, sizeof(struct shared));
+
+	return fail;
+}
diff --git a/test/zdtm/static/tempfs_subns.c b/test/zdtm/static/tempfs_subns.c
index ed3ef9a3a..490fdad6e 100644
--- a/test/zdtm/static/tempfs_subns.c
+++ b/test/zdtm/static/tempfs_subns.c
@@ -20,7 +20,7 @@ int main(int argc, char **argv)
 {
 	int fds[2], i;
 	pid_t pid;
-	int fd, status;
+	int status, fd = -1;
 
 	test_init(argc, argv);
 
diff --git a/test/zdtm/static/thp_disable.c b/test/zdtm/static/thp_disable.c
index ab88120c2..55609f260 100644
--- a/test/zdtm/static/thp_disable.c
+++ b/test/zdtm/static/thp_disable.c
@@ -17,6 +17,7 @@ int main(int argc, char **argv)
 	unsigned long orig_flags = 0, new_flags = 0;
 	unsigned long orig_madv = 0, new_madv = 0;
 	void *area;
+	int ret;
 
 	test_init(argc, argv);
 
@@ -35,9 +36,46 @@ int main(int argc, char **argv)
 		return -1;
 	}
 
+	ret = prctl(PR_GET_THP_DISABLE, 0, 0, 0, 0);
+	if (ret < 0) {
+		pr_perror("Getting THP-disabled flag failed");
+		return -1;
+	}
+	if (ret != 1) {
+		errno = 0;
+		fail("prctl(GET_THP_DISABLE) returned unexpected value: %d != 1", ret);
+		return -1;
+	}
+
+	test_msg("Fetch pre-migration flags/adv\n");
+	if (get_smaps_bits((unsigned long)area, &new_flags, &new_madv))
+		return -1;
+
+	errno = 0;
+	if (orig_flags != new_flags) {
+		fail("Flags changed %lx -> %lx", orig_flags, new_flags);
+		return -1;
+	}
+
+	if (orig_madv != new_madv) {
+		fail("Madvs changed %lx -> %lx", orig_madv, new_madv);
+		return -1;
+	}
+
 	test_daemon();
 	test_waitsig();
 
+	ret = prctl(PR_GET_THP_DISABLE, 0, 0, 0, 0);
+	if (ret < 0) {
+		pr_perror("Getting post-migration THP-disabled flag failed");
+		return -1;
+	}
+	if (ret != 1) {
+		errno = 0;
+		fail("post-migration prctl(GET_THP_DISABLE) returned unexpected value: %d != 1", ret);
+		return -1;
+	}
+
 	if (prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0)) {
 		pr_perror("Enabling THP failed");
 		return -1;
@@ -47,15 +85,14 @@ int main(int argc, char **argv)
 	if (get_smaps_bits((unsigned long)area, &new_flags, &new_madv))
 		return -1;
 
+	errno = 0;
 	if (orig_flags != new_flags) {
-		pr_err("Flags are changed %lx -> %lx\n", orig_flags, new_flags);
-		fail();
+		fail("Flags changed %lx -> %lx", orig_flags, new_flags);
 		return -1;
 	}
 
 	if (orig_madv != new_madv) {
-		pr_err("Madvs are changed %lx -> %lx\n", orig_madv, new_madv);
-		fail();
+		fail("Madvs changed %lx -> %lx", orig_madv, new_madv);
 		return -1;
 	}
 
diff --git a/test/zdtm/static/thread_different_uid_gid.c b/test/zdtm/static/thread_different_uid_gid.c
index 3a0b6291b..88f99659b 100644
--- a/test/zdtm/static/thread_different_uid_gid.c
+++ b/test/zdtm/static/thread_different_uid_gid.c
@@ -130,7 +130,7 @@ int main(int argc, char **argv)
 	ret = syscall(SYS_setresgid, maingroup, maingroup, maingroup);
 	if (ret >= 0) {
 		ret = syscall(SYS_setresuid, mainuser, mainuser, mainuser);
-	} else if (ret < 0) {
+	} else {
 		pr_perror("Failed to drop privileges");
 		exit(1);
 	}
diff --git a/test/zdtm/static/timers01.c b/test/zdtm/static/timers01.c
new file mode 100644
index 000000000..10ecc3481
--- /dev/null
+++ b/test/zdtm/static/timers01.c
@@ -0,0 +1,74 @@
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/time.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Checks non-periodic timers\n";
+const char *test_author = "Andrei Vagin <avagin@gmail.com>";
+
+static struct {
+	const int timer_type;
+	const int signal;
+	volatile sig_atomic_t count;
+} timer_tests[] = {
+	/* from slowest to fastest */
+	{ ITIMER_VIRTUAL, SIGVTALRM },
+	{ ITIMER_PROF, SIGPROF },
+	{ ITIMER_REAL, SIGALRM },
+};
+
+#define NUM_TIMERS (sizeof(timer_tests) / sizeof(timer_tests[0]))
+#define TIMER_TIMEOUT 3600
+#define TIMER_ALLOWED_DELTA 300
+
+static void setup_timers(void)
+{
+	int i;
+	struct itimerval tv = {
+		.it_interval = { .tv_sec = 0, .tv_usec = 0 },
+		.it_value = { .tv_sec = TIMER_TIMEOUT, .tv_usec = 0 },
+	};
+
+	for (i = 0; i < NUM_TIMERS; i++) {
+		if (setitimer(timer_tests[i].timer_type, &tv, NULL) < 0) {
+			pr_perror("can't set timer %d", i);
+			exit(1);
+		}
+	}
+}
+
+static void check_timers(void)
+{
+	int i;
+
+	for (i = 0; i < NUM_TIMERS; i++) {
+		struct itimerval tv = {};
+
+		if (getitimer(timer_tests[i].timer_type, &tv)) {
+			pr_perror("gettimer");
+			exit(1);
+		}
+		if (tv.it_value.tv_sec > TIMER_TIMEOUT ||
+		    tv.it_value.tv_sec < TIMER_TIMEOUT - TIMER_ALLOWED_DELTA) {
+			fail("%ld isn't in [%d, %d]", (long)tv.it_value.tv_sec,
+					TIMER_TIMEOUT,
+					TIMER_TIMEOUT - TIMER_ALLOWED_DELTA);
+			exit(1);
+		}
+	}
+	pass();
+}
+
+int main(int argc, char **argv)
+{
+	test_init(argc, argv);
+
+	setup_timers();
+
+	test_daemon();
+	test_waitsig();
+
+	check_timers();
+	return 0;
+}
diff --git a/test/zdtm/static/uprobes.c b/test/zdtm/static/uprobes.c
new file mode 100644
index 000000000..6ef9a56bc
--- /dev/null
+++ b/test/zdtm/static/uprobes.c
@@ -0,0 +1,295 @@
+#include <fcntl.h>
+#include <gelf.h>
+#include <libelf.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <tracefs.h>
+#include <unistd.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc = "Test the --allow-uprobes option";
+const char *test_author = "Shashank Balaji <shashank.mahadasyam@sony.com>";
+
+#define UPROBE_GROUP_NAME	"zdtm"
+#define UPROBE_EVENT_NAME	"uprobes_test"
+#define UPROBED_FUNCTION	uprobe_target
+
+/*
+ * A uprobe can be set at the start of a function, but not all instructions
+ * will trigger the creation of a uprobes vma.
+ *
+ * Examples:
+ * - aarch64: if the function is a single `ret`, then no vma creation
+ * - x64: if the function is `nop; ret`, then no vma creation
+ *
+ * So to guarantee vma creation, create a volatile dummy variable (to prevent
+ * compiler optimization) and use it (to prevent "unused variable" warning)
+ */
+void UPROBED_FUNCTION(void) {
+	volatile int dummy __maybe_unused = 0;
+	dummy += 1;
+}
+/* Calling via volatile function pointer ensures noinline at callsite */
+typedef void (*func_ptr)(void);
+volatile func_ptr uprobe_target_alias = UPROBED_FUNCTION;
+
+struct uprobe_context {
+	struct tracefs_instance *instance;
+	struct tracefs_dynevent *uprobe;
+};
+
+volatile bool got_sigtrap = false;
+
+/*
+ * Returns the file offset of a symbol in the executable of this program
+ * Returns 0 on failure
+*/
+uint64_t calc_sym_offset(const char *sym_name)
+{
+	GElf_Shdr section_header;
+	Elf_Scn *section = NULL;
+	Elf_Data *symtab_data;
+	uint64_t offset = 0;
+	char buf[PATH_MAX];
+	GElf_Sym symbol;
+	ssize_t n_bytes;
+	int n_entries;
+	Elf *elf;
+	int fd;
+	int i;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		pr_err("ELF version of libelf is lower than that of the program\n");
+		return 0;
+	}
+
+	n_bytes = readlink("/proc/self/exe", buf, sizeof(buf));
+	if (n_bytes < 0) {
+		pr_perror("Failed to readlink /proc/self/exe");
+		return 0;
+	}
+	buf[n_bytes] = '\0';
+
+	fd = open(buf, O_RDONLY);
+	if (fd < 0) {
+		pr_perror("Failed to open self-executable");
+		return 0;
+	}
+
+	elf = elf_begin(fd, ELF_C_READ, NULL);
+	if (!elf) {
+		pr_err("%s\n", elf_errmsg(elf_errno()));
+		goto out_fd;
+	}
+
+	/* Look for the symbol table section and its header */
+	while ((section = elf_nextscn(elf, section)) != NULL) {
+		gelf_getshdr(section, &section_header);
+		if (section_header.sh_type == SHT_SYMTAB)
+			break;
+	}
+	if (!section) {
+		pr_err("Failed to find symbol table\n");
+		goto out_elf;
+	}
+	symtab_data = elf_getdata(section, NULL);
+	n_entries = section_header.sh_size / section_header.sh_entsize;
+
+	/* Look for a symbol with the required name */
+	for (i = 0; i < n_entries; i++) {
+		gelf_getsym(symtab_data, i, &symbol);
+		/* Symbol table's sh_link is the index of the string table section header */
+		if (!strcmp(sym_name,
+			    elf_strptr(elf, section_header.sh_link, symbol.st_name)))
+			break;
+	}
+	if (i == n_entries) {
+		pr_err("Failed to find symbol \"%s\"\n", sym_name);
+		goto out_elf;
+	}
+
+	/* Get the section the symbol belongs to (mostly .text) */
+	section = elf_getscn(elf, symbol.st_shndx);
+	gelf_getshdr(section, &section_header);
+	offset = symbol.st_value - section_header.sh_addr + section_header.sh_offset;
+
+out_elf:
+	elf_end(elf);
+out_fd:
+	close(fd);
+	return offset;
+}
+
+/*
+ * Set and enable a uprobe on the file at the given offset
+ * Returns struct uprobe_context with members set to NULL on failure
+*/
+struct uprobe_context enable_uprobe(const char *file, uint64_t offset)
+{
+	struct tracefs_instance *trace_instance;
+	struct tracefs_dynevent *uprobe;
+	struct uprobe_context context = {};
+
+	trace_instance = tracefs_instance_create("zdtm_uprobes_test");
+	if (!trace_instance) {
+		pr_perror("Failed to create tracefs instance");
+		return context;
+	}
+	tracefs_instance_reset(trace_instance);
+
+	uprobe = tracefs_uprobe_alloc(UPROBE_GROUP_NAME, UPROBE_EVENT_NAME, file, offset, NULL);
+	if (!uprobe) {
+		pr_perror("Failed to allocate uprobe");
+		goto instance_destroy;
+	}
+
+	if (tracefs_dynevent_create(uprobe)) {
+		pr_perror("Failed to create uprobe");
+		goto uprobe_free;
+	}
+
+	if (tracefs_event_enable(trace_instance, UPROBE_GROUP_NAME, UPROBE_EVENT_NAME)) {
+		pr_perror("Failed to enable uprobe");
+		goto uprobe_destroy;
+	}
+
+	context.instance = trace_instance;
+	context.uprobe   = uprobe;
+	return context;
+
+uprobe_destroy:
+	tracefs_dynevent_destroy(uprobe, false);
+uprobe_free:
+	tracefs_dynevent_free(uprobe);
+instance_destroy:
+	tracefs_instance_destroy(trace_instance);
+	tracefs_instance_free(trace_instance);
+	return context;
+}
+
+void destroy_uprobe(struct uprobe_context context)
+{
+	tracefs_dynevent_destroy(context.uprobe, true);
+	tracefs_dynevent_free(context.uprobe);
+	tracefs_instance_destroy(context.instance);
+	tracefs_instance_free(context.instance);
+}
+
+/*
+ * Check for the existence of the "[uprobes]" vma in /proc/self/maps
+ * Returns -1 on failure, 0 if not found, 1 if found
+*/
+int uprobes_vma_exists(void)
+{
+	FILE *f;
+	char buf[LINE_MAX];
+	int ret = 0;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f) {
+		pr_perror("Failed to open /proc/self/maps");
+		return -1;
+	}
+
+	while (fgets(buf, sizeof(buf), f)) {
+		if (strstr(buf, "[uprobes]")) {
+			ret = 1;
+			break;
+		}
+	}
+	if (ret == 0 && !feof(f)) {
+		pr_err("Failed to finish reading /proc/self/maps\n");
+		ret = -1;
+	}
+
+	fclose(f);
+	return ret;
+}
+
+/*
+ * SIGTRAP is sent if execution reaches a previously set uprobed location, and
+ * the corresponding uprobe is not active. We don't want this to happen on restore
+*/
+void sigtrap_handler(int signo, siginfo_t *info, void* context)
+{
+	if (info->si_code == SI_KERNEL) {
+		got_sigtrap = true;
+		fail("SIGTRAP on attempting to call uprobed function");
+	}
+}
+
+int main(int argc, char **argv)
+{
+	struct uprobe_context context;
+	struct sigaction sa;
+	char buf[PATH_MAX];
+	uint64_t offset;
+	int n_bytes;
+	int ret = 1;
+
+	test_init(argc, argv);
+
+	offset = calc_sym_offset(__stringify(UPROBED_FUNCTION));
+	if (!offset)
+		return 1;
+
+	n_bytes = readlink("/proc/self/exe", buf, sizeof(buf));
+	if (n_bytes < 0) {
+		pr_perror("Failed to readlink /proc/self/exe");
+		return 1;
+	}
+	buf[n_bytes] = '\0';
+
+	sa.sa_flags = SA_SIGINFO;
+	sa.sa_sigaction = sigtrap_handler;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(SIGTRAP, &sa, NULL)) {
+		pr_perror("Failed to set SIGTRAP handler");
+		return 1;
+	}
+
+	context = enable_uprobe(buf, offset);
+	if (!context.instance)
+		return 1;
+
+	/*
+	 * Execution must reach the uprobed location at least once
+	 * for the kernel to create the uprobes vma
+	*/
+	uprobe_target_alias();
+
+	switch (uprobes_vma_exists()) {
+	case -1:
+		goto out_uprobe;
+		break;
+	case 0:
+		pr_err("uprobes vma does not exist\n");
+		goto out_uprobe;
+		break;
+	case 1:
+		test_msg("Found uprobes vma\n");
+		break;
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	/*
+	 * Calling the uprobed function after restore should not cause
+	 * a SIGTRAP, since the uprobe is still active
+	*/
+	uprobe_target_alias();
+	if (!got_sigtrap) {
+		pass();
+		ret = 0;
+	}
+
+out_uprobe:
+	destroy_uprobe(context);
+	return ret;
+}
diff --git a/test/zdtm/static/uprobes.desc b/test/zdtm/static/uprobes.desc
new file mode 100644
index 000000000..6eab1f498
--- /dev/null
+++ b/test/zdtm/static/uprobes.desc
@@ -0,0 +1,6 @@
+{
+	'feature': 'cgroupns',
+	'flags': 'suid nouser',
+	'flavor': 'h',
+	'opts': '--allow-uprobes'
+}
diff --git a/test/zdtm/static/vdso-proxy.c b/test/zdtm/static/vdso-proxy.c
index 43334974f..a53e6cdc0 100644
--- a/test/zdtm/static/vdso-proxy.c
+++ b/test/zdtm/static/vdso-proxy.c
@@ -70,6 +70,7 @@ static int parse_maps(struct vm_area *vmas)
 #endif
 		v->is_vvar_or_vdso |= strstr(buf, "[vdso]") != NULL;
 		v->is_vvar_or_vdso |= strstr(buf, "[vvar]") != NULL;
+		v->is_vvar_or_vdso |= strstr(buf, "[vvar_vclock]") != NULL;
 		test_msg("[NOTE]\tVMA: [%#" PRIx64 ", %#" PRIx64 "]\n", v->start, v->end);
 	}
 
@@ -86,42 +87,35 @@ static int parse_maps(struct vm_area *vmas)
 	return i;
 }
 
-int compare_vmas(struct vm_area *vmax, struct vm_area *vmay)
-{
-	if (vmax->start > vmay->start)
-		return 1;
-	if (vmax->start < vmay->start)
-		return -1;
-	if (vmax->end > vmay->end)
-		return 1;
-	if (vmax->end < vmay->end)
-		return -1;
-
-	return 0;
-}
-
-static int check_vvar_vdso(struct vm_area *before, struct vm_area *after)
+static int check_vvar_vdso(struct vm_area *before, int nr_before, struct vm_area *after, int nr_after)
 {
 	int i, j = 0;
 
-	for (i = 0; i < MAX_VMAS && j < MAX_VMAS; i++, j++) {
-		int cmp = compare_vmas(&before[i], &after[j]);
-
-		if (cmp == 0)
-			continue;
-
-		if (cmp < 0) { /* Lost mapping */
+	for (i = 0, j = 0; i < nr_before || j < nr_after;) {
+		if (j == nr_after || before[i].start < after[j].start) {
 			test_msg("[NOTE]\tLost mapping: %#" PRIx64 "-%#" PRIx64 "\n", before[i].start, before[i].end);
-			j--;
 			if (before[i].is_vvar_or_vdso) {
 				fail("Lost vvar/vdso mapping");
 				return -1;
 			}
+			i++;
 			continue;
 		}
-
-		test_msg("[NOTE]\tNew mapping appeared: %#" PRIx64 "-%#" PRIx64 "\n", after[j].start, after[j].end);
-		i--;
+		if (i == nr_before || before[i].start > after[j].start) {
+			test_msg("[NOTE]\tNew mapping appeared: %#" PRIx64 "-%#" PRIx64 "\n", after[j].start, after[j].end);
+			j++;
+			continue;
+		}
+		if (before[i].end == after[j].end) {
+			i++;
+			j++;
+		} else if (before[i].end > after[j].end) {
+			before[i].start = after[j].end;
+			j++;
+		} else {
+			after[j].start = before[i].end;
+			i++;
+		}
 	}
 
 	return 0;
@@ -129,11 +123,10 @@ static int check_vvar_vdso(struct vm_area *before, struct vm_area *after)
 
 static struct vm_area vmas_before[MAX_VMAS];
 static struct vm_area vmas_after[MAX_VMAS];
+static int nr_before, nr_after;
 
 int main(int argc, char *argv[])
 {
-	int nr_before, nr_after;
-
 	test_init(argc, argv);
 
 	test_msg("[NOTE]\tMappings before:\n");
@@ -154,7 +147,7 @@ int main(int argc, char *argv[])
 	}
 
 	/* After restore vDSO/VVAR blobs must remain in the old place. */
-	if (check_vvar_vdso(vmas_before, vmas_after))
+	if (check_vvar_vdso(vmas_before, nr_before, vmas_after, nr_after))
 		return -1;
 
 	if (nr_before + 2 < nr_after) {
diff --git a/test/zdtm/static/vdso00.c b/test/zdtm/static/vdso00.c
index a9bef4dbd..69123a203 100644
--- a/test/zdtm/static/vdso00.c
+++ b/test/zdtm/static/vdso00.c
@@ -1,6 +1,6 @@
 #include <stdio.h>
 #include <stdlib.h>
-
+#include <inttypes.h>
 #include <unistd.h>
 
 #include <sys/time.h>
@@ -19,14 +19,14 @@ int main(int argc, char *argv[])
 	test_msg("%s pid %d\n", argv[0], getpid());
 
 	gettimeofday(&tv, &tz);
-	test_msg("%d time: %10li\n", getpid(), tv.tv_sec);
+	test_msg("%d time: %10" PRId64 "\n", getpid(), (int64_t)tv.tv_sec);
 
 	test_daemon();
 	test_waitsig();
 
 	/* this call will fail if vDSO is corrupted */
 	gettimeofday(&tv, &tz);
-	test_msg("%d time: %10li\n", getpid(), tv.tv_sec);
+	test_msg("%d time: %10" PRId64 "\n", getpid(), (int64_t)tv.tv_sec);
 
 	pass();
 
diff --git a/test/zdtm/static/vdso01.c b/test/zdtm/static/vdso01.c
index d8d64155a..d8b3c94d5 100644
--- a/test/zdtm/static/vdso01.c
+++ b/test/zdtm/static/vdso01.c
@@ -1,5 +1,6 @@
 #include <stdlib.h>
 #include <stdio.h>
+#include <inttypes.h>
 #include <stdbool.h>
 #include <unistd.h>
 #include <string.h>
@@ -324,7 +325,8 @@ static int vdso_clock_gettime_handler(void *func)
 	clock_gettime(CLOCK_REALTIME, &ts1);
 	vdso_clock_gettime(CLOCK_REALTIME, &ts2);
 
-	test_msg("clock_gettime: tv_sec %li vdso_clock_gettime: tv_sec %li\n", ts1.tv_sec, ts2.tv_sec);
+	test_msg("clock_gettime: tv_sec %" PRId64 " vdso_clock_gettime: tv_sec %" PRId64 "\n",
+		 (int64_t)ts1.tv_sec, (int64_t)ts2.tv_sec);
 
 	if (labs(ts1.tv_sec - ts2.tv_sec) > TIME_DELTA_SEC) {
 		pr_perror("Delta is too big");
@@ -354,7 +356,8 @@ static int vdso_gettimeofday_handler(void *func)
 	gettimeofday(&tv1, &tz);
 	vdso_gettimeofday(&tv2, &tz);
 
-	test_msg("gettimeofday: tv_sec %li vdso_gettimeofday: tv_sec %li\n", tv1.tv_sec, tv2.tv_sec);
+	test_msg("gettimeofday: tv_sec %" PRId64 " vdso_gettimeofday: tv_sec %" PRId64 "\n",
+		 (int64_t)tv1.tv_sec, (int64_t)tv2.tv_sec);
 
 	if (labs(tv1.tv_sec - tv2.tv_sec) > TIME_DELTA_SEC) {
 		pr_perror("Delta is too big");
@@ -372,7 +375,7 @@ static int vdso_time_handler(void *func)
 	t1 = time(NULL);
 	t2 = vdso_time(NULL);
 
-	test_msg("time: %li vdso_time: %li\n", (long)t1, (long)t1);
+	test_msg("time: %li vdso_time: %li\n", (long)t1, (long)t2);
 
 	if (labs(t1 - t2) > TIME_DELTA_SEC) {
 		pr_perror("Delta is too big");
diff --git a/test/zdtm/static/vdso02.c b/test/zdtm/static/vdso02.c
index 2050bca71..5779b7fd6 100644
--- a/test/zdtm/static/vdso02.c
+++ b/test/zdtm/static/vdso02.c
@@ -29,7 +29,8 @@ static int parse_vm_area(char *buf, struct vm_area *vma)
 	return -1;
 }
 
-static int find_blobs(pid_t pid, struct vm_area *vdso, struct vm_area *vvar)
+static int find_blobs(pid_t pid, struct vm_area *vdso,
+		      struct vm_area *vvar, struct vm_area *vvar_vclock)
 {
 	char buf[BUF_SZ];
 	int ret = -1;
@@ -39,6 +40,8 @@ static int find_blobs(pid_t pid, struct vm_area *vdso, struct vm_area *vvar)
 	vdso->end = VDSO_BAD_ADDR;
 	vvar->start = VVAR_BAD_ADDR;
 	vvar->end = VVAR_BAD_ADDR;
+	vvar_vclock->start = VVAR_BAD_ADDR;
+	vvar_vclock->end = VVAR_BAD_ADDR;
 
 	if (snprintf(buf, BUF_SZ, "/proc/%d/maps", pid) < 0) {
 		pr_perror("snprintf() failure for path");
@@ -57,12 +60,18 @@ static int find_blobs(pid_t pid, struct vm_area *vdso, struct vm_area *vvar)
 
 		if (strstr(buf, "[vvar]") && parse_vm_area(buf, vvar))
 			goto err;
+		if (strstr(buf, "[vvar_vclock]") &&
+		    parse_vm_area(buf, vvar_vclock))
+			goto err;
 	}
 
 	if (vdso->start != VDSO_BAD_ADDR)
 		test_msg("[vdso] %lx-%lx\n", vdso->start, vdso->end);
 	if (vvar->start != VVAR_BAD_ADDR)
 		test_msg("[vvar] %lx-%lx\n", vvar->start, vvar->end);
+	if (vvar_vclock->start != VVAR_BAD_ADDR)
+		test_msg("[vvar_vclock] %lx-%lx\n",
+			 vvar_vclock->start, vvar_vclock->end);
 	ret = 0;
 err:
 	fclose(maps);
@@ -143,10 +152,10 @@ void sys_exit(int status)
 
 static int unmap_blobs(void)
 {
-	struct vm_area vdso, vvar;
+	struct vm_area vdso, vvar, vvar_vclock;
 	int ret;
 
-	if (find_blobs(getpid(), &vdso, &vvar))
+	if (find_blobs(getpid(), &vdso, &vvar, &vvar_vclock))
 		return -1;
 
 	if (vdso.start != VDSO_BAD_ADDR) {
@@ -159,13 +168,19 @@ static int unmap_blobs(void)
 		if (ret)
 			return ret;
 	}
+	if (vvar_vclock.start != VVAR_BAD_ADDR) {
+		ret = sys_munmap((void *)vvar_vclock.start,
+				 vvar_vclock.end - vvar_vclock.start);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
 
 int main(int argc, char *argv[])
 {
-	struct vm_area vdso, vvar;
+	struct vm_area vdso, vvar, vvar_vclock;
 	pid_t child;
 	int status, ret = -1;
 
@@ -201,9 +216,11 @@ int main(int argc, char *argv[])
 		goto out_kill;
 	}
 
-	if (find_blobs(child, &vdso, &vvar))
+	if (find_blobs(child, &vdso, &vvar, &vvar_vclock))
 		goto out_kill;
-	if (vdso.start != VDSO_BAD_ADDR || vvar.start != VVAR_BAD_ADDR) {
+	if (vdso.start != VDSO_BAD_ADDR ||
+	    vvar.start != VVAR_BAD_ADDR ||
+	    vvar_vclock.start != VVAR_BAD_ADDR) {
 		pr_err("Found vvar or vdso blob(s) in child, which should have unmapped them\n");
 		goto out_kill;
 	}
@@ -211,7 +228,7 @@ int main(int argc, char *argv[])
 	test_daemon();
 	test_waitsig();
 
-	if (find_blobs(child, &vdso, &vvar))
+	if (find_blobs(child, &vdso, &vvar, &vvar_vclock))
 		goto out_kill;
 	if (vdso.start != VDSO_BAD_ADDR || vvar.start != VVAR_BAD_ADDR) {
 		pr_err("Child without vdso got it after C/R\n");
diff --git a/test/zdtm/transition/Makefile b/test/zdtm/transition/Makefile
index 98440f4e2..ddf2faaad 100644
--- a/test/zdtm/transition/Makefile
+++ b/test/zdtm/transition/Makefile
@@ -25,6 +25,7 @@ TST_NOFILE	=	\
 		pidfd_store_sk \
 		rseq01		\
 		rseq02		\
+		stack		\
 
 
 TST_FILE	=	\
@@ -83,7 +84,9 @@ ptrace:		LDFLAGS += -pthread
 fork2:		CFLAGS += -D FORK2
 thread-bomb.o:	CFLAGS += -pthread
 thread-bomb:	LDFLAGS += -pthread
+rseq01:		LDLIBS += -pthread
 rseq02:		CFLAGS += -D NORESTART
+rseq02:		LDLIBS += -pthread
 
 %: %.sh
 	cp $< $@
diff --git a/test/zdtm/transition/epoll.c b/test/zdtm/transition/epoll.c
index fdd492ab2..803e50541 100644
--- a/test/zdtm/transition/epoll.c
+++ b/test/zdtm/transition/epoll.c
@@ -158,9 +158,11 @@ int main(int argc, char **argv)
 			exit(1);
 		}
 		for (i = 0; i < rv; i++) {
-			while (read(events[i].data.fd, buf, buf_size) > 0)
+			int ret;
+
+			while ((ret = read(events[i].data.fd, buf, buf_size)) > 0)
 				;
-			if (errno != EAGAIN && errno != 0 && errno) {
+			if (ret < 0 && errno != EAGAIN) {
 				pr_perror("read error");
 				killall();
 				exit(1);
diff --git a/test/zdtm/transition/ipc.c b/test/zdtm/transition/ipc.c
index 0f16dbc68..7660f70af 100644
--- a/test/zdtm/transition/ipc.c
+++ b/test/zdtm/transition/ipc.c
@@ -178,7 +178,7 @@ int main(int argc, char **argv)
 		pr_perror("Child 2 was killed");
 	} else if (WEXITSTATUS(ret)) {
 		fail_count++;
-		pr_perror("Child 2 couldn't inititalise");
+		pr_perror("Child 2 couldn't initialise");
 	}
 out_child:
 	kill(pid1, SIGTERM);
@@ -188,7 +188,7 @@ out_child:
 		pr_perror("Child 1 was killed");
 	} else if (WEXITSTATUS(ret)) {
 		fail_count++;
-		pr_perror("Child 1 couldn't inititalise");
+		pr_perror("Child 1 couldn't initialise");
 	}
 out_shdt:
 	shmdt(mem);
diff --git a/test/zdtm/transition/lazy-thp.c b/test/zdtm/transition/lazy-thp.c
index 2bf99dc4c..2e9722b96 100644
--- a/test/zdtm/transition/lazy-thp.c
+++ b/test/zdtm/transition/lazy-thp.c
@@ -25,7 +25,7 @@ int main(int argc, char **argv)
 
 	test_init(argc, argv);
 
-	/* we presume that malloc returns not page aliged address */
+	/* we presume that malloc returns not page aligned address */
 	mem = malloc(PAGE_SIZE * N_PAGES);
 	org = malloc(PAGE_SIZE);
 	if (!mem || !org) {
diff --git a/test/zdtm/transition/maps007.c b/test/zdtm/transition/maps007.c
index 8a605cfe0..35c196bc4 100644
--- a/test/zdtm/transition/maps007.c
+++ b/test/zdtm/transition/maps007.c
@@ -38,7 +38,7 @@ int main(int argc, char **argv)
 	struct {
 		futex_t delta;
 		futex_t stop;
-	} * shm;
+	} *shm;
 	uint32_t v;
 	unsigned long long count = 0;
 	int i;
diff --git a/test/zdtm/transition/rseq01.c b/test/zdtm/transition/rseq01.c
index b6d470785..08a7a8e1a 100644
--- a/test/zdtm/transition/rseq01.c
+++ b/test/zdtm/transition/rseq01.c
@@ -33,7 +33,10 @@ static inline void *thread_pointer(void)
 static inline void unregister_old_rseq(void)
 {
 	/* unregister rseq */
-	syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG);
+	unsigned int size = __rseq_size;
+	if (__rseq_size < 32)
+		size = 32;
+	syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), size, 1, RSEQ_SIG);
 }
 #else
 static inline void unregister_old_rseq(void)
@@ -86,7 +89,7 @@ struct rseq {
 #endif
 /* EOF */
 
-static volatile struct rseq *rseq_ptr;
+static __thread volatile struct rseq *rseq_ptr;
 static __thread volatile struct rseq __rseq_abi;
 
 static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig)
@@ -119,7 +122,7 @@ static void check_thread(void)
 
 #define rseq_after_asm_goto() asm volatile("" : : : "memory")
 
-static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+static int rseq_addv(intptr_t *v, intptr_t count, int cpu, bool ignore_abort, const char *id)
 {
 	double a = 10000000000000000.0;
 	double b = -1;
@@ -177,7 +180,7 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 	);
 	/* clang-format on */
 	rseq_after_asm_goto();
-	test_msg("exit %lx %lx %f %f\n", rseq_cs1, rseq_cs2, a, b);
+	test_msg("exit %s, %lx %lx %f %f\n", id, rseq_cs1, rseq_cs2, a, b);
 	if (rseq_cs1 != rseq_cs2) {
 		/*
 		 * It means that we finished critical section
@@ -192,16 +195,45 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 	return 0;
 abort:
 	rseq_after_asm_goto();
-	test_msg("abort %lx %lx %f %f\n", rseq_cs1, rseq_cs2, a, b);
+	test_msg("abort %s, %lx %lx %f %f\n", id, rseq_cs1, rseq_cs2, a, b);
+	if (ignore_abort)
+		return 0;
 	return -1;
 }
 
+static task_waiter_t waiter;
+static intptr_t *cpu_data;
+bool ignore_abort = true;
+int thread_ret;
+
+void *thread_routine(void *args)
+{
+	int cpu;
+
+	rseq_ptr = &__rseq_abi;
+	memset((void *)rseq_ptr, 0, sizeof(struct rseq));
+	register_thread();
+	task_waiter_complete(&waiter, 1);
+	task_waiter_wait4(&waiter, 2);
+
+	while (test_go()) {
+		cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start);
+		thread_ret = rseq_addv(&cpu_data[cpu], 2, cpu, ignore_abort, "thread");
+
+		if (thread_ret)
+			break;
+	}
+
+	check_thread();
+	return NULL;
+}
+
 int main(int argc, char *argv[])
 {
 	int cpu = 0;
 	int ret;
-	intptr_t *cpu_data;
 	long nr_cpus;
+	pthread_t thread;
 
 	rseq_ptr = &__rseq_abi;
 	memset((void *)rseq_ptr, 0, sizeof(struct rseq));
@@ -225,31 +257,37 @@ int main(int argc, char *argv[])
 	 * https://github.com/torvalds/linux/blob/ce522ba9/kernel/rseq.c#L192
 	 */
 #ifdef NORESTART
+	ignore_abort = false;
 	rseq_ptr->flags = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL |
 			  RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE;
 #endif
 
+	task_waiter_init(&waiter);
+	if (pthread_create(&thread, NULL, thread_routine, NULL)) {
+		fail("pthread_create");
+		exit(EXIT_FAILURE);
+	}
+	task_waiter_wait4(&waiter, 1);
+
 	test_daemon();
+	task_waiter_complete(&waiter, 2);
 
 	while (test_go()) {
 		cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start);
-		ret = rseq_addv(&cpu_data[cpu], 2, cpu);
-
-/* NORESTART is NOT set */
-#ifndef NORESTART
-		/* just ignore abort */
-		ret = 0;
-#endif
+		ret = rseq_addv(&cpu_data[cpu], 2, cpu, ignore_abort, "task");
 
 		if (ret)
 			break;
 	}
 
-	test_waitsig();
-
 	check_thread();
 
-	if (ret)
+	if (pthread_join(thread, NULL)) {
+		fail("pthread_join");
+		exit(EXIT_FAILURE);
+	}
+
+	if (ret || thread_ret)
 		fail();
 	else
 		pass();
diff --git a/test/zdtm/transition/stack.c b/test/zdtm/transition/stack.c
new file mode 100644
index 000000000..9548b9182
--- /dev/null
+++ b/test/zdtm/transition/stack.c
@@ -0,0 +1,16 @@
+#include "zdtmtst.h"
+
+const char *test_doc = "Tests that parasite code does not write past the start of the stack";
+const char *test_author = "Younes Manton <ymanton@ca.ibm.com>";
+
+int main(int argc, char **argv)
+{
+	test_init(argc, argv);
+
+	test_daemon();
+	test_waitsig();
+
+	pass();
+
+	return 0;
+}
diff --git a/test/zdtm_ct.c b/test/zdtm_ct.c
index 0e8eeff8a..44316893d 100644
--- a/test/zdtm_ct.c
+++ b/test/zdtm_ct.c
@@ -93,44 +93,50 @@ static int create_timens(void)
 
 int main(int argc, char **argv)
 {
+	uid_t uid;
 	pid_t pid;
 	int status;
 
+	uid = getuid();
+
 	/*
 	 * pidns is used to avoid conflicts
 	 * mntns is used to mount /proc
-	 * net is used to avoid conflicts of parasite sockets
+	 * net is used to avoid conflicts between network tests
 	 */
-	if (unshare(CLONE_NEWNS | CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC))
-		return 1;
+	if (!uid)
+		if (unshare(CLONE_NEWNS | CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC))
+			return 1;
 	pid = fork();
 	if (pid == 0) {
-		if (create_timens())
-			exit(1);
-		if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL)) {
-			fprintf(stderr, "mount(/, S_REC | MS_SLAVE)): %m");
-			return 1;
+		if (!uid) {
+			if (create_timens())
+				exit(1);
+			if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL)) {
+				fprintf(stderr, "mount(/, S_REC | MS_SLAVE)): %m");
+				return 1;
+			}
+			umount2("/proc", MNT_DETACH);
+			umount2("/dev/pts", MNT_DETACH);
+			if (mount("zdtm_proc", "/proc", "proc", 0, NULL)) {
+				fprintf(stderr, "mount(/proc): %m");
+				return 1;
+			}
+			if (mount("zdtm_devpts", "/dev/pts", "devpts", 0, "newinstance,ptmxmode=0666")) {
+				fprintf(stderr, "mount(pts): %m");
+				return 1;
+			}
+			if (mount("zdtm_binfmt", "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, NULL)) {
+				fprintf(stderr, "mount(binfmt_misc): %m");
+				return 1;
+			}
+			if (mount("/dev/pts/ptmx", "/dev/ptmx", NULL, MS_BIND, NULL)) {
+				fprintf(stderr, "mount(ptmx): %m");
+				return 1;
+			}
+			if (system("ip link set up dev lo"))
+				return 1;
 		}
-		umount2("/proc", MNT_DETACH);
-		umount2("/dev/pts", MNT_DETACH);
-		if (mount("zdtm_proc", "/proc", "proc", 0, NULL)) {
-			fprintf(stderr, "mount(/proc): %m");
-			return 1;
-		}
-		if (mount("zdtm_devpts", "/dev/pts", "devpts", 0, "newinstance,ptmxmode=0666")) {
-			fprintf(stderr, "mount(pts): %m");
-			return 1;
-		}
-		if (mount("zdtm_binfmt", "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, NULL)) {
-			fprintf(stderr, "mount(binfmt_misc): %m");
-			return 1;
-		}
-		if (mount("/dev/pts/ptmx", "/dev/ptmx", NULL, MS_BIND, NULL)) {
-			fprintf(stderr, "mount(ptmx): %m");
-			return 1;
-		}
-		if (system("ip link set up dev lo"))
-			return 1;
 		execv(argv[1], argv + 1);
 		fprintf(stderr, "execve: %m");
 		return 1;