diff --git a/.circleci/config.yml b/.circleci/config.yml
new file mode 100644
index 000000000..785b383e1
--- /dev/null
+++ b/.circleci/config.yml
@@ -0,0 +1,27 @@
+version: 2.1
+jobs:
+ test-local-gcc:
+ machine:
+ image: default
+ working_directory: ~/criu
+ steps:
+ - checkout
+ - run:
+ name: "Test local with GCC"
+ command: sudo -E make -C scripts/ci local
+ test-local-clang:
+ machine:
+ image: default
+ working_directory: ~/criu
+ steps:
+ - checkout
+ - run:
+ name: "Test local with CLANG"
+ command: sudo -E make -C scripts/ci local CLANG=1
+
+workflows:
+ version: 2
+ builds:
+ jobs:
+ - test-local-gcc
+ - test-local-clang
diff --git a/.cirrus.yml b/.cirrus.yml
new file mode 100644
index 000000000..72dbb3898
--- /dev/null
+++ b/.cirrus.yml
@@ -0,0 +1,101 @@
+task:
+ name: Vagrant Fedora based test (no VDSO)
+ environment:
+ HOME: "/root"
+ CIRRUS_WORKING_DIR: "/tmp/criu"
+
+ compute_engine_instance:
+ image_project: cirrus-images
+ image: family/docker-kvm
+ platform: linux
+ cpu: 4
+ memory: 16G
+ nested_virtualization: true
+
+ setup_script: |
+ contrib/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
+ sudo kvm-ok
+ build_script: |
+ make -C scripts/ci vagrant-fedora-no-vdso
+
+task:
+ name: CentOS Stream 9 based test
+ environment:
+ HOME: "/root"
+ CIRRUS_WORKING_DIR: "/tmp/criu"
+
+ compute_engine_instance:
+ image_project: centos-cloud
+ image: family/centos-stream-9
+ platform: linux
+ cpu: 4
+ memory: 8G
+
+ setup_script: |
+ dnf config-manager --set-enabled crb # Same as CentOS 8 powertools
+ dnf -y install epel-release epel-next-release
+ contrib/dependencies/dnf-packages.sh
+ # The image has a too old version of nettle which does not work with gnutls.
+ # Just upgrade to the latest to make the error go away.
+ dnf -y upgrade nettle nettle-devel
+ systemctl stop sssd
+ # Even with selinux in permissive mode the selinux tests will be executed.
+ # The Cirrus CI user runs as a service from selinux point of view and is
+ # much more restricted than a normal shell (system_u:system_r:unconfined_service_t:s0).
+ # The test case above (vagrant-fedora-no-vdso) should run selinux tests in enforcing mode.
+ setenforce 0
+
+ build_script: |
+ make -C scripts/ci local SKIP_CI_PREP=1 CC=gcc CD_TO_TOP=1 ZDTM_OPTS="-x zdtm/static/socket-raw"
+
+task:
+ name: Vagrant Fedora Rawhide based test
+ environment:
+ HOME: "/root"
+ CIRRUS_WORKING_DIR: "/tmp/criu"
+
+ compute_engine_instance:
+ image_project: cirrus-images
+ image: family/docker-kvm
+ platform: linux
+ cpu: 4
+ memory: 16G
+ nested_virtualization: true
+
+ setup_script: |
+ contrib/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
+ sudo kvm-ok
+ build_script: |
+ make -C scripts/ci vagrant-fedora-rawhide
+
+task:
+ name: Vagrant Fedora based test (non-root)
+ environment:
+ HOME: "/root"
+ CIRRUS_WORKING_DIR: "/tmp/criu"
+
+ compute_engine_instance:
+ image_project: cirrus-images
+ image: family/docker-kvm
+ platform: linux
+ cpu: 4
+ memory: 16G
+ nested_virtualization: true
+
+ setup_script: |
+ contrib/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
+ sudo kvm-ok
+ build_script: |
+ make -C scripts/ci vagrant-fedora-non-root
+
+task:
+ name: aarch64 Fedora Rawhide
+ arm_container:
+ image: registry.fedoraproject.org/fedora:rawhide
+ cpu: 4
+ memory: 4G
+ script: uname -a
+ build_script: |
+ scripts/ci/prepare-for-fedora-rawhide.sh
+ make -C scripts/ci/ local CC=gcc SKIP_CI_PREP=1 SKIP_CI_TEST=1 CD_TO_TOP=1
+ make -C test/zdtm -j 4
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 000000000..fb40bc613
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,565 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# clang-format configuration file. Intended for clang-format >= 11.
+#
+# For more information, see:
+#
+# Documentation/process/clang-format.rst
+# https://clang.llvm.org/docs/ClangFormat.html
+# https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+#
+---
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Left # Unknown to clang-format-4.0
+AlignOperands: true
+AlignTrailingComments: true
+AlignConsecutiveMacros: true
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+ AfterClass: false
+ AfterControlStatement: false
+ AfterEnum: false
+ AfterFunction: true
+ AfterNamespace: true
+ AfterObjCDeclaration: false
+ AfterStruct: false
+ AfterUnion: false
+ AfterExternBlock: false # Unknown to clang-format-5.0
+ BeforeCatch: false
+ BeforeElse: false
+ IndentBraces: false
+ SplitEmptyFunction: true # Unknown to clang-format-4.0
+ SplitEmptyRecord: true # Unknown to clang-format-4.0
+ SplitEmptyNamespace: true # Unknown to clang-format-4.0
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+BreakBeforeInheritanceComma: false # Unknown to clang-format-4.0
+BreakBeforeTernaryOperators: false
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: false
+ColumnLimit: 0
+CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: false # Unknown to clang-format-4.0
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 8
+ContinuationIndentWidth: 8
+Cpp11BracedListStyle: false
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: false # Unknown to clang-format-4.0
+
+# Taken from:
+# git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ \
+# | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$, - '\1'," \
+# | sort | uniq
+ForEachMacros:
+ - 'for_each_pstree_item'
+ - 'for_each_bit'
+ - 'apei_estatus_for_each_section'
+ - 'ata_for_each_dev'
+ - 'ata_for_each_link'
+ - '__ata_qc_for_each'
+ - 'ata_qc_for_each'
+ - 'ata_qc_for_each_raw'
+ - 'ata_qc_for_each_with_internal'
+ - 'ax25_for_each'
+ - 'ax25_uid_for_each'
+ - '__bio_for_each_bvec'
+ - 'bio_for_each_bvec'
+ - 'bio_for_each_bvec_all'
+ - 'bio_for_each_integrity_vec'
+ - '__bio_for_each_segment'
+ - 'bio_for_each_segment'
+ - 'bio_for_each_segment_all'
+ - 'bio_list_for_each'
+ - 'bip_for_each_vec'
+ - 'bitmap_for_each_clear_region'
+ - 'bitmap_for_each_set_region'
+ - 'blkg_for_each_descendant_post'
+ - 'blkg_for_each_descendant_pre'
+ - 'blk_queue_for_each_rl'
+ - 'bond_for_each_slave'
+ - 'bond_for_each_slave_rcu'
+ - 'bpf_for_each_spilled_reg'
+ - 'btree_for_each_safe128'
+ - 'btree_for_each_safe32'
+ - 'btree_for_each_safe64'
+ - 'btree_for_each_safel'
+ - 'card_for_each_dev'
+ - 'cgroup_taskset_for_each'
+ - 'cgroup_taskset_for_each_leader'
+ - 'cpufreq_for_each_entry'
+ - 'cpufreq_for_each_entry_idx'
+ - 'cpufreq_for_each_valid_entry'
+ - 'cpufreq_for_each_valid_entry_idx'
+ - 'css_for_each_child'
+ - 'css_for_each_descendant_post'
+ - 'css_for_each_descendant_pre'
+ - 'device_for_each_child_node'
+ - 'displayid_iter_for_each'
+ - 'dma_fence_chain_for_each'
+ - 'do_for_each_ftrace_op'
+ - 'drm_atomic_crtc_for_each_plane'
+ - 'drm_atomic_crtc_state_for_each_plane'
+ - 'drm_atomic_crtc_state_for_each_plane_state'
+ - 'drm_atomic_for_each_plane_damage'
+ - 'drm_client_for_each_connector_iter'
+ - 'drm_client_for_each_modeset'
+ - 'drm_connector_for_each_possible_encoder'
+ - 'drm_for_each_bridge_in_chain'
+ - 'drm_for_each_connector_iter'
+ - 'drm_for_each_crtc'
+ - 'drm_for_each_crtc_reverse'
+ - 'drm_for_each_encoder'
+ - 'drm_for_each_encoder_mask'
+ - 'drm_for_each_fb'
+ - 'drm_for_each_legacy_plane'
+ - 'drm_for_each_plane'
+ - 'drm_for_each_plane_mask'
+ - 'drm_for_each_privobj'
+ - 'drm_mm_for_each_hole'
+ - 'drm_mm_for_each_node'
+ - 'drm_mm_for_each_node_in_range'
+ - 'drm_mm_for_each_node_safe'
+ - 'flow_action_for_each'
+ - 'for_each_acpi_dev_match'
+ - 'for_each_active_dev_scope'
+ - 'for_each_active_drhd_unit'
+ - 'for_each_active_iommu'
+ - 'for_each_aggr_pgid'
+ - 'for_each_available_child_of_node'
+ - 'for_each_bio'
+ - 'for_each_board_func_rsrc'
+ - 'for_each_bvec'
+ - 'for_each_card_auxs'
+ - 'for_each_card_auxs_safe'
+ - 'for_each_card_components'
+ - 'for_each_card_dapms'
+ - 'for_each_card_pre_auxs'
+ - 'for_each_card_prelinks'
+ - 'for_each_card_rtds'
+ - 'for_each_card_rtds_safe'
+ - 'for_each_card_widgets'
+ - 'for_each_card_widgets_safe'
+ - 'for_each_cgroup_storage_type'
+ - 'for_each_child_of_node'
+ - 'for_each_clear_bit'
+ - 'for_each_clear_bit_from'
+ - 'for_each_cmsghdr'
+ - 'for_each_compatible_node'
+ - 'for_each_component_dais'
+ - 'for_each_component_dais_safe'
+ - 'for_each_comp_order'
+ - 'for_each_console'
+ - 'for_each_cpu'
+ - 'for_each_cpu_and'
+ - 'for_each_cpu_not'
+ - 'for_each_cpu_wrap'
+ - 'for_each_dapm_widgets'
+ - 'for_each_dev_addr'
+ - 'for_each_dev_scope'
+ - 'for_each_dma_cap_mask'
+ - 'for_each_dpcm_be'
+ - 'for_each_dpcm_be_rollback'
+ - 'for_each_dpcm_be_safe'
+ - 'for_each_dpcm_fe'
+ - 'for_each_drhd_unit'
+ - 'for_each_dss_dev'
+ - 'for_each_dtpm_table'
+ - 'for_each_efi_memory_desc'
+ - 'for_each_efi_memory_desc_in_map'
+ - 'for_each_element'
+ - 'for_each_element_extid'
+ - 'for_each_element_id'
+ - 'for_each_endpoint_of_node'
+ - 'for_each_evictable_lru'
+ - 'for_each_fib6_node_rt_rcu'
+ - 'for_each_fib6_walker_rt'
+ - 'for_each_free_mem_pfn_range_in_zone'
+ - 'for_each_free_mem_pfn_range_in_zone_from'
+ - 'for_each_free_mem_range'
+ - 'for_each_free_mem_range_reverse'
+ - 'for_each_func_rsrc'
+ - 'for_each_hstate'
+ - 'for_each_if'
+ - 'for_each_iommu'
+ - 'for_each_ip_tunnel_rcu'
+ - 'for_each_irq_nr'
+ - 'for_each_link_codecs'
+ - 'for_each_link_cpus'
+ - 'for_each_link_platforms'
+ - 'for_each_lru'
+ - 'for_each_matching_node'
+ - 'for_each_matching_node_and_match'
+ - 'for_each_member'
+ - 'for_each_memcg_cache_index'
+ - 'for_each_mem_pfn_range'
+ - '__for_each_mem_range'
+ - 'for_each_mem_range'
+ - '__for_each_mem_range_rev'
+ - 'for_each_mem_range_rev'
+ - 'for_each_mem_region'
+ - 'for_each_migratetype_order'
+ - 'for_each_msi_entry'
+ - 'for_each_msi_entry_safe'
+ - 'for_each_msi_vector'
+ - 'for_each_net'
+ - 'for_each_net_continue_reverse'
+ - 'for_each_netdev'
+ - 'for_each_netdev_continue'
+ - 'for_each_netdev_continue_rcu'
+ - 'for_each_netdev_continue_reverse'
+ - 'for_each_netdev_feature'
+ - 'for_each_netdev_in_bond_rcu'
+ - 'for_each_netdev_rcu'
+ - 'for_each_netdev_reverse'
+ - 'for_each_netdev_safe'
+ - 'for_each_net_rcu'
+ - 'for_each_new_connector_in_state'
+ - 'for_each_new_crtc_in_state'
+ - 'for_each_new_mst_mgr_in_state'
+ - 'for_each_new_plane_in_state'
+ - 'for_each_new_private_obj_in_state'
+ - 'for_each_node'
+ - 'for_each_node_by_name'
+ - 'for_each_node_by_type'
+ - 'for_each_node_mask'
+ - 'for_each_node_state'
+ - 'for_each_node_with_cpus'
+ - 'for_each_node_with_property'
+ - 'for_each_nonreserved_multicast_dest_pgid'
+ - 'for_each_of_allnodes'
+ - 'for_each_of_allnodes_from'
+ - 'for_each_of_cpu_node'
+ - 'for_each_of_pci_range'
+ - 'for_each_old_connector_in_state'
+ - 'for_each_old_crtc_in_state'
+ - 'for_each_old_mst_mgr_in_state'
+ - 'for_each_oldnew_connector_in_state'
+ - 'for_each_oldnew_crtc_in_state'
+ - 'for_each_oldnew_mst_mgr_in_state'
+ - 'for_each_oldnew_plane_in_state'
+ - 'for_each_oldnew_plane_in_state_reverse'
+ - 'for_each_oldnew_private_obj_in_state'
+ - 'for_each_old_plane_in_state'
+ - 'for_each_old_private_obj_in_state'
+ - 'for_each_online_cpu'
+ - 'for_each_online_node'
+ - 'for_each_online_pgdat'
+ - 'for_each_pci_bridge'
+ - 'for_each_pci_dev'
+ - 'for_each_pci_msi_entry'
+ - 'for_each_pcm_streams'
+ - 'for_each_physmem_range'
+ - 'for_each_populated_zone'
+ - 'for_each_possible_cpu'
+ - 'for_each_present_cpu'
+ - 'for_each_prime_number'
+ - 'for_each_prime_number_from'
+ - 'for_each_process'
+ - 'for_each_process_thread'
+ - 'for_each_prop_codec_conf'
+ - 'for_each_prop_dai_codec'
+ - 'for_each_prop_dai_cpu'
+ - 'for_each_prop_dlc_codecs'
+ - 'for_each_prop_dlc_cpus'
+ - 'for_each_prop_dlc_platforms'
+ - 'for_each_property_of_node'
+ - 'for_each_registered_fb'
+ - 'for_each_requested_gpio'
+ - 'for_each_requested_gpio_in_range'
+ - 'for_each_reserved_mem_range'
+ - 'for_each_reserved_mem_region'
+ - 'for_each_rtd_codec_dais'
+ - 'for_each_rtd_components'
+ - 'for_each_rtd_cpu_dais'
+ - 'for_each_rtd_dais'
+ - 'for_each_set_bit'
+ - 'for_each_set_bit_from'
+ - 'for_each_set_clump8'
+ - 'for_each_sg'
+ - 'for_each_sg_dma_page'
+ - 'for_each_sg_page'
+ - 'for_each_sgtable_dma_page'
+ - 'for_each_sgtable_dma_sg'
+ - 'for_each_sgtable_page'
+ - 'for_each_sgtable_sg'
+ - 'for_each_sibling_event'
+ - 'for_each_subelement'
+ - 'for_each_subelement_extid'
+ - 'for_each_subelement_id'
+ - '__for_each_thread'
+ - 'for_each_thread'
+ - 'for_each_unicast_dest_pgid'
+ - 'for_each_vsi'
+ - 'for_each_wakeup_source'
+ - 'for_each_zone'
+ - 'for_each_zone_zonelist'
+ - 'for_each_zone_zonelist_nodemask'
+ - 'fwnode_for_each_available_child_node'
+ - 'fwnode_for_each_child_node'
+ - 'fwnode_graph_for_each_endpoint'
+ - 'gadget_for_each_ep'
+ - 'genradix_for_each'
+ - 'genradix_for_each_from'
+ - 'hash_for_each'
+ - 'hash_for_each_possible'
+ - 'hash_for_each_possible_rcu'
+ - 'hash_for_each_possible_rcu_notrace'
+ - 'hash_for_each_possible_safe'
+ - 'hash_for_each_rcu'
+ - 'hash_for_each_safe'
+ - 'hctx_for_each_ctx'
+ - 'hlist_bl_for_each_entry'
+ - 'hlist_bl_for_each_entry_rcu'
+ - 'hlist_bl_for_each_entry_safe'
+ - 'hlist_for_each'
+ - 'hlist_for_each_entry'
+ - 'hlist_for_each_entry_continue'
+ - 'hlist_for_each_entry_continue_rcu'
+ - 'hlist_for_each_entry_continue_rcu_bh'
+ - 'hlist_for_each_entry_from'
+ - 'hlist_for_each_entry_from_rcu'
+ - 'hlist_for_each_entry_rcu'
+ - 'hlist_for_each_entry_rcu_bh'
+ - 'hlist_for_each_entry_rcu_notrace'
+ - 'hlist_for_each_entry_safe'
+ - 'hlist_for_each_entry_srcu'
+ - '__hlist_for_each_rcu'
+ - 'hlist_for_each_safe'
+ - 'hlist_nulls_for_each_entry'
+ - 'hlist_nulls_for_each_entry_from'
+ - 'hlist_nulls_for_each_entry_rcu'
+ - 'hlist_nulls_for_each_entry_safe'
+ - 'i3c_bus_for_each_i2cdev'
+ - 'i3c_bus_for_each_i3cdev'
+ - 'ide_host_for_each_port'
+ - 'ide_port_for_each_dev'
+ - 'ide_port_for_each_present_dev'
+ - 'idr_for_each_entry'
+ - 'idr_for_each_entry_continue'
+ - 'idr_for_each_entry_continue_ul'
+ - 'idr_for_each_entry_ul'
+ - 'in_dev_for_each_ifa_rcu'
+ - 'in_dev_for_each_ifa_rtnl'
+ - 'inet_bind_bucket_for_each'
+ - 'inet_lhash2_for_each_icsk_rcu'
+ - 'key_for_each'
+ - 'key_for_each_safe'
+ - 'klp_for_each_func'
+ - 'klp_for_each_func_safe'
+ - 'klp_for_each_func_static'
+ - 'klp_for_each_object'
+ - 'klp_for_each_object_safe'
+ - 'klp_for_each_object_static'
+ - 'kunit_suite_for_each_test_case'
+ - 'kvm_for_each_memslot'
+ - 'kvm_for_each_vcpu'
+ - 'list_for_each'
+ - 'list_for_each_codec'
+ - 'list_for_each_codec_safe'
+ - 'list_for_each_continue'
+ - 'list_for_each_entry'
+ - 'list_for_each_entry_continue'
+ - 'list_for_each_entry_continue_rcu'
+ - 'list_for_each_entry_continue_reverse'
+ - 'list_for_each_entry_from'
+ - 'list_for_each_entry_from_rcu'
+ - 'list_for_each_entry_from_reverse'
+ - 'list_for_each_entry_lockless'
+ - 'list_for_each_entry_rcu'
+ - 'list_for_each_entry_reverse'
+ - 'list_for_each_entry_safe'
+ - 'list_for_each_entry_safe_continue'
+ - 'list_for_each_entry_safe_from'
+ - 'list_for_each_entry_safe_reverse'
+ - 'list_for_each_entry_srcu'
+ - 'list_for_each_prev'
+ - 'list_for_each_prev_safe'
+ - 'list_for_each_safe'
+ - 'llist_for_each'
+ - 'llist_for_each_entry'
+ - 'llist_for_each_entry_safe'
+ - 'llist_for_each_safe'
+ - 'mci_for_each_dimm'
+ - 'media_device_for_each_entity'
+ - 'media_device_for_each_intf'
+ - 'media_device_for_each_link'
+ - 'media_device_for_each_pad'
+ - 'nanddev_io_for_each_page'
+ - 'netdev_for_each_lower_dev'
+ - 'netdev_for_each_lower_private'
+ - 'netdev_for_each_lower_private_rcu'
+ - 'netdev_for_each_mc_addr'
+ - 'netdev_for_each_uc_addr'
+ - 'netdev_for_each_upper_dev_rcu'
+ - 'netdev_hw_addr_list_for_each'
+ - 'nft_rule_for_each_expr'
+ - 'nla_for_each_attr'
+ - 'nla_for_each_nested'
+ - 'nlmsg_for_each_attr'
+ - 'nlmsg_for_each_msg'
+ - 'nr_neigh_for_each'
+ - 'nr_neigh_for_each_safe'
+ - 'nr_node_for_each'
+ - 'nr_node_for_each_safe'
+ - 'of_for_each_phandle'
+ - 'of_property_for_each_string'
+ - 'of_property_for_each_u32'
+ - 'pci_bus_for_each_resource'
+ - 'pcl_for_each_chunk'
+ - 'pcl_for_each_segment'
+ - 'pcm_for_each_format'
+ - 'ping_portaddr_for_each_entry'
+ - 'plist_for_each'
+ - 'plist_for_each_continue'
+ - 'plist_for_each_entry'
+ - 'plist_for_each_entry_continue'
+ - 'plist_for_each_entry_safe'
+ - 'plist_for_each_safe'
+ - 'pnp_for_each_card'
+ - 'pnp_for_each_dev'
+ - 'protocol_for_each_card'
+ - 'protocol_for_each_dev'
+ - 'queue_for_each_hw_ctx'
+ - 'radix_tree_for_each_slot'
+ - 'radix_tree_for_each_tagged'
+ - 'rb_for_each'
+ - 'rbtree_postorder_for_each_entry_safe'
+ - 'rdma_for_each_block'
+ - 'rdma_for_each_port'
+ - 'rdma_umem_for_each_dma_block'
+ - 'resource_list_for_each_entry'
+ - 'resource_list_for_each_entry_safe'
+ - 'rhl_for_each_entry_rcu'
+ - 'rhl_for_each_rcu'
+ - 'rht_for_each'
+ - 'rht_for_each_entry'
+ - 'rht_for_each_entry_from'
+ - 'rht_for_each_entry_rcu'
+ - 'rht_for_each_entry_rcu_from'
+ - 'rht_for_each_entry_safe'
+ - 'rht_for_each_from'
+ - 'rht_for_each_rcu'
+ - 'rht_for_each_rcu_from'
+ - '__rq_for_each_bio'
+ - 'rq_for_each_bvec'
+ - 'rq_for_each_segment'
+ - 'scsi_for_each_prot_sg'
+ - 'scsi_for_each_sg'
+ - 'sctp_for_each_hentry'
+ - 'sctp_skb_for_each'
+ - 'shdma_for_each_chan'
+ - '__shost_for_each_device'
+ - 'shost_for_each_device'
+ - 'sk_for_each'
+ - 'sk_for_each_bound'
+ - 'sk_for_each_entry_offset_rcu'
+ - 'sk_for_each_from'
+ - 'sk_for_each_rcu'
+ - 'sk_for_each_safe'
+ - 'sk_nulls_for_each'
+ - 'sk_nulls_for_each_from'
+ - 'sk_nulls_for_each_rcu'
+ - 'snd_array_for_each'
+ - 'snd_pcm_group_for_each_entry'
+ - 'snd_soc_dapm_widget_for_each_path'
+ - 'snd_soc_dapm_widget_for_each_path_safe'
+ - 'snd_soc_dapm_widget_for_each_sink_path'
+ - 'snd_soc_dapm_widget_for_each_source_path'
+ - 'tb_property_for_each'
+ - 'tcf_exts_for_each_action'
+ - 'udp_portaddr_for_each_entry'
+ - 'udp_portaddr_for_each_entry_rcu'
+ - 'usb_hub_for_each_child'
+ - 'v4l2_device_for_each_subdev'
+ - 'v4l2_m2m_for_each_dst_buf'
+ - 'v4l2_m2m_for_each_dst_buf_safe'
+ - 'v4l2_m2m_for_each_src_buf'
+ - 'v4l2_m2m_for_each_src_buf_safe'
+ - 'virtio_device_for_each_vq'
+ - 'while_for_each_ftrace_op'
+ - 'xa_for_each'
+ - 'xa_for_each_marked'
+ - 'xa_for_each_range'
+ - 'xa_for_each_start'
+ - 'xas_for_each'
+ - 'xas_for_each_conflict'
+ - 'xas_for_each_marked'
+ - 'xbc_array_for_each_value'
+ - 'xbc_for_each_key_value'
+ - 'xbc_node_for_each_array_value'
+ - 'xbc_node_for_each_child'
+ - 'xbc_node_for_each_key_value'
+ - 'zorro_for_each_dev'
+
+IncludeBlocks: Preserve # Unknown to clang-format-5.0
+IncludeCategories:
+ - Regex: '.*'
+ Priority: 1
+IncludeIsMainRegex: '(Test)?$'
+IndentCaseLabels: false
+IndentGotoLabels: false
+IndentPPDirectives: None # Unknown to clang-format-5.0
+IndentWidth: 8
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Auto # Unknown to clang-format-5.0
+ObjCBlockIndentWidth: 8
+ObjCSpaceAfterProperty: true
+ObjCSpaceBeforeProtocolList: true
+
+# Taken from git's rules
+PenaltyBreakAssignment: 10 # Unknown to clang-format-4.0
+PenaltyBreakBeforeFirstCallParameter: 30
+PenaltyBreakComment: 10
+PenaltyBreakFirstLessLess: 0
+PenaltyBreakString: 10
+PenaltyExcessCharacter: 100
+PenaltyReturnTypeOnItsOwnLine: 60
+
+PointerAlignment: Right
+ReflowComments: false
+SortIncludes: false
+SortUsingDeclarations: false # Unknown to clang-format-4.0
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCtorInitializerColon: true # Unknown to clang-format-5.0
+SpaceBeforeInheritanceColon: true # Unknown to clang-format-5.0
+SpaceBeforeParens: ControlStatementsExceptForEachMacros
+SpaceBeforeRangeBasedForLoopColon: true # Unknown to clang-format-5.0
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: false
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp03
+TabWidth: 8
+UseTab: Always
+...
diff --git a/.codespellrc b/.codespellrc
new file mode 100644
index 000000000..5def594b2
--- /dev/null
+++ b/.codespellrc
@@ -0,0 +1,3 @@
+[codespell]
+skip = ./.git,./test/pki,./tags,./plugins/amdgpu/amdgpu_drm.h,./plugins/amdgpu/drm.h,./plugins/amdgpu/drm_mode.h
+ignore-words-list = creat,fpr,fle,ue,bord,parms,nd,te,testng,inh,wronly,renderd,bui,clen,sems
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
new file mode 100644
index 000000000..222d66156
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE.md
@@ -0,0 +1,63 @@
+
+
+**Description**
+
+
+
+**Steps to reproduce the issue:**
+1.
+2.
+3.
+
+**Describe the results you received:**
+
+
+**Describe the results you expected:**
+
+
+**Additional information you deem important (e.g. issue happens only occasionally):**
+
+
+**CRIU logs and information:**
+
+
+
+CRIU full dump/restore logs:
+
+
+```
+(paste your output here)
+```
+
+
+
+
+Output of `criu --version`:
+
+
+```
+(paste your output here)
+```
+
+
+
+
+Output of `criu check --all`:
+
+
+```
+(paste your output here)
+```
+
+
+
+
+**Additional environment details:**
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 000000000..62365b191
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,18 @@
+
diff --git a/.github/workflows/aarch64-test.yaml b/.github/workflows/aarch64-test.yaml
new file mode 100644
index 000000000..ebbecadb3
--- /dev/null
+++ b/.github/workflows/aarch64-test.yaml
@@ -0,0 +1,34 @@
+name: aarch64 test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: aarch64-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ strategy:
+ matrix:
+ os: [ubuntu-24.04-arm, ubuntu-22.04-arm]
+ target: [GCC=1, CLANG=1]
+
+ runs-on: ${{ matrix.os }}
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run Tests ${{ matrix.target }} on ${{ matrix.os }}
+ # Following tests are failing on the VMs:
+ # ./change_mnt_context --pidfile=change_mnt_context.pid --outfile=change_mnt_context.out
+ # 45: ERR: change_mnt_context.c:23: mount (errno = 22 (Invalid argument))
+ #
+ # In combination with '--remote-lazy-pages' following error occurs:
+ # 138: FAIL: maps05.c:84: Data corrupted at page 1639 (errno = 11 (Resource temporarily unavailable))
+ run: |
+ # The 'sched_policy00' needs the following:
+ sudo sysctl -w kernel.sched_rt_runtime_us=-1
+ # etc/hosts entry is needed for netns_lock_iptables
+ echo "127.0.0.1 localhost" | sudo tee -a /etc/hosts
+ sudo -E make -C scripts/ci local ${{ matrix.target }} RUN_TESTS=1 \
+ ZDTM_OPTS="-x zdtm/static/change_mnt_context -x zdtm/static/maps05"
diff --git a/.github/workflows/alpine-test.yml b/.github/workflows/alpine-test.yml
new file mode 100644
index 000000000..0f5c20f48
--- /dev/null
+++ b/.github/workflows/alpine-test.yml
@@ -0,0 +1,21 @@
+name: Alpine Test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: alpine-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ strategy:
+ matrix:
+ os: [ubuntu-22.04, ubuntu-22.04-arm]
+ target: [GCC=1, CLANG=1]
+ runs-on: ${{ matrix.os }}
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run Alpine ${{ matrix.target }} Test
+ run: sudo -E make -C scripts/ci alpine ${{ matrix.target }}
diff --git a/.github/workflows/archlinux-test.yml b/.github/workflows/archlinux-test.yml
new file mode 100644
index 000000000..425f0662b
--- /dev/null
+++ b/.github/workflows/archlinux-test.yml
@@ -0,0 +1,16 @@
+name: Arch Linux Test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: archlinux-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run Arch Linux Test
+ run: sudo -E make -C scripts/ci archlinux
diff --git a/.github/workflows/check-commits.yml b/.github/workflows/check-commits.yml
new file mode 100644
index 000000000..bf7d06697
--- /dev/null
+++ b/.github/workflows/check-commits.yml
@@ -0,0 +1,30 @@
+name: Verify self-contained commits
+
+on: pull_request
+
+# Cancel any preceding run on the pull request
+concurrency:
+ group: commit-test-${{ github.event.pull_request.number }}
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ # Check if pull request does not have label "not-selfcontained-ok"
+ if: "!contains(github.event.pull_request.labels.*.name, 'not-selfcontained-ok')"
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ # Needed to rebase against the base branch
+ fetch-depth: 0
+ # Checkout pull request HEAD commit instead of merge commit
+ ref: ${{ github.event.pull_request.head.sha }}
+ - name: Install dependencies
+ run: sudo contrib/apt-install libprotobuf-dev libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python3-protobuf libnl-3-dev libnet-dev libcap-dev uuid-dev
+ - name: Configure git user details
+ run: |
+ git config --global user.email "checkpoint-restore@users.noreply.github.com"
+ git config --global user.name "checkpoint-restore"
+ - name: Configure base branch without switching current branch
+ run: git fetch origin ${{ github.base_ref }}:${{ github.base_ref }}
+ - name: Build each commit
+ run: git rebase ${{ github.base_ref }} -x "make -C scripts/ci check-commit"
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
new file mode 100644
index 000000000..9c9e46c1b
--- /dev/null
+++ b/.github/workflows/codeql.yml
@@ -0,0 +1,50 @@
+name: "CodeQL"
+
+on:
+ push:
+ branches: [ "criu-dev", "master" ]
+ pull_request:
+ branches: [ "criu-dev" ]
+ schedule:
+ - cron: "11 6 * * 3"
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: codeql-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ analyze:
+ name: Analyze
+ runs-on: ubuntu-latest
+ permissions:
+ actions: read
+ contents: read
+ security-events: write
+
+ strategy:
+ fail-fast: false
+ matrix:
+ language: [ python, cpp ]
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Install Packages (cpp)
+ if: ${{ matrix.language == 'cpp' }}
+ run: |
+ sudo contrib/apt-install protobuf-c-compiler libprotobuf-c-dev libprotobuf-dev build-essential libprotobuf-dev libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python3-protobuf libnet-dev pkg-config libnl-3-dev libbsd0 libbsd-dev iproute2 libcap-dev libaio-dev libbsd-dev python3-yaml libnl-route-3-dev gnutls-dev
+ - name: Initialize CodeQL
+ uses: github/codeql-action/init@v3
+ with:
+ languages: ${{ matrix.language }}
+ queries: +security-and-quality
+
+ - name: Autobuild
+ uses: github/codeql-action/autobuild@v3
+
+ - name: Perform CodeQL Analysis
+ uses: github/codeql-action/analyze@v3
+ with:
+ category: "/language:${{ matrix.language }}"
diff --git a/.github/workflows/compat-test.yml b/.github/workflows/compat-test.yml
new file mode 100644
index 000000000..8a64ce185
--- /dev/null
+++ b/.github/workflows/compat-test.yml
@@ -0,0 +1,21 @@
+name: Compat Tests
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: compat-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ubuntu-22.04
+ strategy:
+ matrix:
+ target: [GCC, CLANG]
+
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run Compat Tests (${{ matrix.target }})
+ run: sudo -E make -C scripts/ci local COMPAT_TEST=y ${{ matrix.target }}=1
diff --git a/.github/workflows/cross-compile-daily.yml b/.github/workflows/cross-compile-daily.yml
new file mode 100644
index 000000000..c709cca00
--- /dev/null
+++ b/.github/workflows/cross-compile-daily.yml
@@ -0,0 +1,22 @@
+name: Daily Cross Compile Tests
+
+on:
+ schedule:
+ - cron: '30 12 * * *'
+
+jobs:
+ build:
+
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ target: [armv7-stable-cross, aarch64-stable-cross, ppc64-stable-cross, mips64el-stable-cross, riscv64-stable-cross]
+ branches: [criu-dev, master]
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ ref: ${{ matrix.branches }}
+ - name: Run Cross Compilation Targets
+ run: >
+ sudo make -C scripts/ci ${{ matrix.target }}
diff --git a/.github/workflows/cross-compile.yml b/.github/workflows/cross-compile.yml
new file mode 100644
index 000000000..96672b294
--- /dev/null
+++ b/.github/workflows/cross-compile.yml
@@ -0,0 +1,40 @@
+name: Cross Compile Tests
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: cross-compile-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+
+ runs-on: ubuntu-latest
+ continue-on-error: ${{ matrix.experimental }}
+ strategy:
+ fail-fast: false
+ matrix:
+ experimental: [false]
+ target: [
+ armv7-stable-cross,
+ aarch64-stable-cross,
+ ppc64-stable-cross,
+ mips64el-stable-cross,
+ riscv64-stable-cross,
+ ]
+ include:
+ - experimental: true
+ target: armv7-unstable-cross
+ - experimental: true
+ target: aarch64-unstable-cross
+ - experimental: true
+ target: ppc64-unstable-cross
+ - experimental: true
+ target: mips64el-unstable-cross
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run Cross Compilation Targets
+ run: >
+ sudo make -C scripts/ci ${{ matrix.target }}
diff --git a/.github/workflows/docker-test.yml b/.github/workflows/docker-test.yml
new file mode 100644
index 000000000..23696905a
--- /dev/null
+++ b/.github/workflows/docker-test.yml
@@ -0,0 +1,19 @@
+name: Docker Test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: docker-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ os: [ubuntu-22.04]
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run Docker Test (${{ matrix.os }})
+ run: sudo make -C scripts/ci docker-test
diff --git a/.github/workflows/fedora-asan-test.yml b/.github/workflows/fedora-asan-test.yml
new file mode 100644
index 000000000..02dc9a1b3
--- /dev/null
+++ b/.github/workflows/fedora-asan-test.yml
@@ -0,0 +1,17 @@
+name: Fedora ASAN Test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: fedora-asan-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ubuntu-22.04
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run Fedora ASAN Test
+ run: sudo -E make -C scripts/ci fedora-asan
diff --git a/.github/workflows/fedora-rawhide-test.yml b/.github/workflows/fedora-rawhide-test.yml
new file mode 100644
index 000000000..83e2ead82
--- /dev/null
+++ b/.github/workflows/fedora-rawhide-test.yml
@@ -0,0 +1,21 @@
+name: Fedora Rawhide Test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: fedora-rawhide-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ubuntu-22.04
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run Fedora Rawhide Test
+ # We need to pass environment variables from the CI environment to
+ # distinguish between CI environments. However, we need to make sure that
+ # XDG_RUNTIME_DIR environment variable is not set due to a bug in Podman.
+ # FIXME: https://github.com/containers/podman/issues/14920
+ run: sudo -E XDG_RUNTIME_DIR= make -C scripts/ci fedora-rawhide CONTAINER_RUNTIME=podman BUILD_OPTIONS="--security-opt seccomp=unconfined"
diff --git a/.github/workflows/gcov-test.yml b/.github/workflows/gcov-test.yml
new file mode 100644
index 000000000..cc4e1d44a
--- /dev/null
+++ b/.github/workflows/gcov-test.yml
@@ -0,0 +1,21 @@
+name: Coverage Tests
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: gcov-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ubuntu-22.04
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run Coverage Tests
+ run: sudo -E make -C scripts/ci local GCOV=1
+ - name: Run gcov
+ run: sudo -E find . -name '*gcda' -type f -print0 | sudo -E xargs --null --max-args 128 --max-procs 4 gcov
+ - name: Run Coverage Analysis
+ run: sudo -E make codecov
diff --git a/.github/workflows/java-test.yml b/.github/workflows/java-test.yml
new file mode 100644
index 000000000..cbd3c1f23
--- /dev/null
+++ b/.github/workflows/java-test.yml
@@ -0,0 +1,16 @@
+name: Java Test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: java-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run Java Test
+ run: sudo make -C scripts/ci java-test
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 000000000..f7da4f6f6
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,40 @@
+name: Run code linter
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: lint-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ container:
+ image: registry.fedoraproject.org/fedora:latest
+ steps:
+ - name: Install tools
+ run: sudo dnf -y install git make ruff xz clang-tools-extra codespell git-clang-format ShellCheck
+
+ - uses: actions/checkout@v4
+
+ - name: Set git safe directory
+ # https://github.com/actions/checkout/issues/760
+ run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+ - name: Run make lint
+ run: make lint
+
+ - name: Run make indent
+ continue-on-error: true
+ run: |
+ if [ -z "${{github.base_ref}}" ]; then
+ git fetch --deepen=1
+ make indent
+ else
+ git fetch origin ${{github.base_ref}}
+ make indent BASE=origin/${{github.base_ref}}
+ fi
+ - name: Raise in-line make indent warnings
+ run: |
+ git diff | ./scripts/github-indent-warnings.py
diff --git a/.github/workflows/loongarch64-qemu-test.yml b/.github/workflows/loongarch64-qemu-test.yml
new file mode 100644
index 000000000..d7c554c87
--- /dev/null
+++ b/.github/workflows/loongarch64-qemu-test.yml
@@ -0,0 +1,15 @@
+name: LoongArch64 Qemu Test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: loongarch64-qemu-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v4
+ - run: sudo make -C scripts/ci loongarch64-qemu-test
diff --git a/.github/workflows/manage-labels.yml b/.github/workflows/manage-labels.yml
new file mode 100644
index 000000000..a2bcd8860
--- /dev/null
+++ b/.github/workflows/manage-labels.yml
@@ -0,0 +1,14 @@
+name: Remove labels
+on: [issue_comment, pull_request_review_comment]
+jobs:
+ remove-labels-on-comments:
+ name: Remove labels on comments
+ if: github.event_name == 'issue_comment'
+ runs-on: ubuntu-latest
+ steps:
+ - uses: mondeja/remove-labels-gh-action@v1
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ labels: |
+ changes requested
+ awaiting reply
diff --git a/.github/workflows/nftables-test.yml b/.github/workflows/nftables-test.yml
new file mode 100644
index 000000000..7a7d8bd30
--- /dev/null
+++ b/.github/workflows/nftables-test.yml
@@ -0,0 +1,24 @@
+name: Nftables bases testing
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: nftables-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ubuntu-24.04
+ steps:
+ - uses: actions/checkout@v4
+ - name: Remove iptables
+ run: sudo apt remove -y iptables
+ - name: Install libnftables-dev
+ run: sudo contrib/apt-install libnftables-dev
+ - name: chmod 755 /home/runner
+ # CRIU's tests are sometimes running as some random user and need
+ # to be able to access the test files.
+ run: sudo chmod 755 /home/runner
+ - name: Build with nftables network locking backend
+ run: sudo make -C scripts/ci local COMPILE_FLAGS="NETWORK_LOCK_DEFAULT=NETWORK_LOCK_NFTABLES"
diff --git a/.github/workflows/podman-test.yml b/.github/workflows/podman-test.yml
new file mode 100644
index 000000000..a07edbe5b
--- /dev/null
+++ b/.github/workflows/podman-test.yml
@@ -0,0 +1,16 @@
+name: Podman Test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: podman-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run Podman Test
+ run: sudo make -C scripts/ci podman-test
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 000000000..76d55d4c9
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,27 @@
+name: Mark stale issues and pull requests
+
+# Please refer to https://github.com/actions/stale/blob/master/action.yml
+# to see all config knobs of the stale action.
+
+on:
+ schedule:
+ - cron: "0 0 * * *"
+
+jobs:
+ stale:
+
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/stale@v5
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+ stale-issue-message: 'A friendly reminder that this issue had no activity for 30 days.'
+ stale-pr-message: 'A friendly reminder that this PR had no activity for 30 days.'
+ stale-issue-label: 'stale-issue'
+ stale-pr-label: 'stale-pr'
+ days-before-stale: 30
+ days-before-close: 365
+ remove-stale-when-updated: true
+ exempt-pr-labels: 'no-auto-close'
+ exempt-issue-labels: 'no-auto-close,new feature,enhancement'
diff --git a/.github/workflows/stream-test.yml b/.github/workflows/stream-test.yml
new file mode 100644
index 000000000..76bd96edf
--- /dev/null
+++ b/.github/workflows/stream-test.yml
@@ -0,0 +1,17 @@
+name: CRIU Image Streamer Test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: stream-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ubuntu-22.04
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run CRIU Image Streamer Test
+ run: sudo -E make -C scripts/ci local STREAM_TEST=1
diff --git a/.github/workflows/x86-64-clang-test.yml b/.github/workflows/x86-64-clang-test.yml
new file mode 100644
index 000000000..1f0a469bd
--- /dev/null
+++ b/.github/workflows/x86-64-clang-test.yml
@@ -0,0 +1,16 @@
+name: X86_64 CLANG Test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: clang-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run X86_64 CLANG Test
+ run: sudo make -C scripts/ci x86_64 CLANG=1
diff --git a/.github/workflows/x86-64-gcc-test.yml b/.github/workflows/x86-64-gcc-test.yml
new file mode 100644
index 000000000..15e84a0df
--- /dev/null
+++ b/.github/workflows/x86-64-gcc-test.yml
@@ -0,0 +1,16 @@
+name: X86_64 GCC Test
+
+on: [push, pull_request]
+
+# Cancel any preceding run on the pull request.
+concurrency:
+ group: gcc-test-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
+
+jobs:
+ build:
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v4
+ - name: Run X86_64 GCC Test
+ run: sudo make -C scripts/ci x86_64
diff --git a/.gitignore b/.gitignore
index c231104af..94daa13ea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,25 +20,16 @@ compel/compel
compel/compel-host-bin
images/*.c
images/*.h
-images/google/protobuf/*.c
-images/google/protobuf/*.h
.gitid
criu/criu
-crit/crit
-criu/arch/*/sys-exec-tbl*.c
-# x86 syscalls-table is not generated
-!criu/arch/x86/sys-exec-tbl.c
-criu/arch/*/syscalls*.S
-criu/include/syscall-codes*.h
-criu/include/syscall*.h
+criu/unittest/unittest
criu/include/version.h
criu/pie/restorer-blob.h
criu/pie/parasite-blob.h
criu/protobuf-desc-gen.h
lib/build/
lib/c/criu.pc
-scripts/build/qemu-user-static/*
-lib/.crit-setup.files
compel/include/asm
include/common/asm
include/common/config.h
+build/**
diff --git a/.lgtm.yml b/.lgtm.yml
new file mode 100644
index 000000000..4beadcc63
--- /dev/null
+++ b/.lgtm.yml
@@ -0,0 +1,25 @@
+extraction:
+ cpp:
+ prepare:
+ packages:
+ - "protobuf-c-compiler"
+ - "libprotobuf-c-dev"
+ - "libprotobuf-dev"
+ - "build-essential"
+ - "libprotobuf-dev"
+ - "libprotobuf-c-dev"
+ - "protobuf-c-compiler"
+ - "protobuf-compiler"
+ - "python3-protobuf"
+ - "libnet-dev"
+ - "pkg-config"
+ - "libnl-3-dev"
+ - "libbsd0"
+ - "libbsd-dev"
+ - "iproute2"
+ - "libcap-dev"
+ - "libaio-dev"
+ - "libbsd-dev"
+ - "python3-yaml"
+ - "libnl-route-3-dev"
+ - "gnutls-dev"
diff --git a/.mailmap b/.mailmap
index d8c3f594d..8076f0bc9 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1,6 +1,10 @@
Stanislav Kinsbursky
Pavel Emelyanov
-Andrey Vagin
-Andrey Vagin
-Andrey Vagin Andrew Vagin
+Andrei Vagin
+Andrei Vagin
+Andrei Vagin
+Andrei Vagin
+Andrei Vagin
Cyrill Gorcunov
+Alexander Mikhalitsyn
+Alexander Mikhalitsyn
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 82ba9fbc8..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-language: c
-sudo: required
-dist: xenial
-cache: ccache
-services:
- - docker
-env:
- - TR_ARCH=local
- - TR_ARCH=local CLANG=1
- - TR_ARCH=local COMPAT_TEST=y
- - TR_ARCH=local CLANG=1 COMPAT_TEST=y
- - TR_ARCH=alpine
- - TR_ARCH=fedora-asan
- - TR_ARCH=x86_64
- - TR_ARCH=x86_64 CLANG=1
- - TR_ARCH=armv7hf
- - TR_ARCH=aarch64
- - TR_ARCH=ppc64le
- - TR_ARCH=s390x
- - TR_ARCH=armv7hf CLANG=1
- - TR_ARCH=aarch64 CLANG=1
- - TR_ARCH=ppc64le CLANG=1
- - TR_ARCH=alpine CLANG=1
- - TR_ARCH=docker-test
- - TR_ARCH=fedora-rawhide
- - TR_ARCH=fedora-rawhide-aarch64
- - TR_ARCH=centos
- - TR_ARCH=podman-test
-matrix:
- allow_failures:
- - env: TR_ARCH=docker-test
- - env: TR_ARCH=fedora-rawhide
- - env: TR_ARCH=fedora-rawhide-aarch64
- - env: TR_ARCH=s390x
- - env: TR_ARCH=local GCOV=1
- - env: TR_ARCH=local COMPAT_TEST=y
- - env: TR_ARCH=local CLANG=1 COMPAT_TEST=y
-script:
- - sudo make CCACHE=1 -C scripts/travis $TR_ARCH
-after_success:
- - ccache -s
- - make -C scripts/travis after_success
-group: deprecated-2017Q2
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 120000
index 000000000..e3c5a92d9
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+GEMINI.md
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 000000000..03875639d
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,417 @@
+## How to contribute to CRIU
+
+CRIU project is (almost) the never-ending story, because we have to always keep up with the
+Linux kernel supporting checkpoint and restore for all the features it provides. Thus we're
+looking for contributors of all kinds -- feedback, bug reports, testing, coding, writing, etc.
+Here are some useful hints to get involved.
+
+* We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks;
+* CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting);
+* Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles;
+* Feedback is expected on the GitHub issues page and on the [mailing list](https://lore.kernel.org/criu);
+* We accept GitHub pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lore.kernel.org/criu).
+Below we describe in more detail recommend practices for CRIU development.
+* Spread the word about CRIU in [social networks](http://criu.org/Contacts);
+* If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events);
+
+### Setting up the development environment
+
+Although `criu` could be run as non-root (see [Security](https://criu.org/Security)), development is better to be done as root. For example, some tests require root. So, it would be a good idea to set up some recent Linux distro on a virtual machine.
+
+### Get the source code
+
+The CRIU sources are tracked by Git. Official CRIU repo is at https://github.com/checkpoint-restore/criu.
+
+The repository may contain multiple branches. Development happens in the **criu-dev** branch.
+
+To clone CRIU repo and switch to the proper branch, run:
+
+```
+git clone https://github.com/checkpoint-restore/criu criu
+cd criu
+git checkout criu-dev
+```
+
+### Building from source
+
+Follow these steps to compile CRIU from source code.
+
+#### Installing build dependencies
+
+First, you need to install the required build dependencies. We provide scripts to simplify this process for several Linux distributions in [contrib/dependencies](contrib/dependencies). For a complete list of dependencies, please refer to the [installation guide](https://criu.org/Installation).
+
+##### On Ubuntu/Debian-based systems:
+
+```
+./contrib/dependencies/apt-packages.sh
+```
+
+##### On Fedora/CentOS-based systems:
+
+```
+./contrib/dependencies/dnf-packages.sh
+```
+
+##### Using Nix:
+
+```
+nix develop
+```
+
+#### Compiling CRIU
+
+Once the dependencies are installed, you can compile CRIU by running the `make` command from the root of the source directory:
+
+```
+make
+```
+
+This should create the `./criu/criu` executable.
+
+## Edit the source code
+
+When you change the source code, please keep in mind the following code conventions:
+
+* code is written to be read, so the code readability is the most important thing you need to have in mind when preparing patches
+* we prefer tabs and indentations to be 8 characters width
+* we prefer line length of 80 characters or less, more is allowed if it helps with code readability
+* CRIU mostly follows [Linux kernel coding style](https://www.kernel.org/doc/Documentation/process/coding-style.rst), but we are less strict than the kernel community
+
+Other conventions can be learned from the source code itself. In short, make sure your new code looks similar to what is already there.
+
+## Automatic tools to fix coding-style
+
+Important: These tools are there to advise you, but should not be considered as a "source of truth", as tools also make nasty mistakes from time to time which can completely break code readability.
+
+The following command can be used to automatically run a code linter for Python files (ruff), Shell scripts (shellcheck),
+text spelling (codespell), and a number of CRIU-specific checks (usage of print macros and EOL whitespace for C files).
+
+```
+make lint
+```
+
+In addition, we have adopted a [clang-format configuration file](https://www.kernel.org/doc/Documentation/process/clang-format.rst)
+based on the kernel source tree. However, compliance with the clang-format autoformat rules is optional. If the automatic code formatting
+results in decreased readability, we may choose to ignore these errors.
+
+Run the following command to check if your changes are compliant with the clang-format rules:
+
+```
+make indent
+```
+
+This command is built upon the `git-clang-format` tool and supports two options `BASE` and `OPTS`. The `BASE` option allows you to
+specify a range of commits to check for coding style issues. By default, it is set to `HEAD~1`, so that only the last commit is checked.
+If you are developing on top of the criu-dev branch and want to check all your commits for compliance with the clang-format rules, you
+can use `BASE=origin/criu-dev`. The `OPTS` option can be used to pass additional options to `git-clang-format`. For example, if you want
+to check the last *N* commits for formatting errors, without applying the changes to the codebase you can use the following command.
+
+```
+make indent OPTS=--diff BASE=HEAD~N
+```
+
+Note that for pull requests, the "Run code linter" workflow runs these checks for all commits. If a clang-format error is detected
+we need to review the suggested changes and decide if they should be fixed before merging.
+
+Here are some bad examples of clang-format-ing:
+
+* if clang-format tries to force 120 characters and breaks readability - it is wrong:
+
+```
+@@ -58,8 +59,7 @@ static int register_membarriers(void)
+ }
+
+ if (!all_ok) {
+- fail("can't register membarrier()s - tried %#x, kernel %#x",
+- barriers_registered, barriers_supported);
++ fail("can't register membarrier()s - tried %#x, kernel %#x", barriers_registered, barriers_supported);
+ return -1;
+ }
+```
+
+* if clang-format breaks your beautiful readability friendly alignment in structures, comments or defines - it is wrong:
+
+```
+--- a/test/zdtm/static/membarrier.c
++++ b/test/zdtm/static/membarrier.c
+@@ -27,9 +27,10 @@ static const struct {
+ int register_cmd;
+ int execute_cmd;
+ } membarrier_cmds[] = {
+- { "", MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, MEMBARRIER_CMD_PRIVATE_EXPEDITED },
+- { "_SYNC_CORE", MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE },
+- { "_RSEQ", MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ },
++ { "", MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, MEMBARRIER_CMD_PRIVATE_EXPEDITED },
++ { "_SYNC_CORE", MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE,
++ MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE },
++ { "_RSEQ", MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ },
+ };
+```
+
+## Test your changes
+
+CRIU comes with an extensive test suite. To check whether your changes introduce any regressions, run
+
+```
+make test
+```
+
+The command runs [ZDTM Test Suite](https://criu.org/ZDTM_Test_Suite). Check for any error messages produced by it.
+
+## Describe your changes
+
+Describe your problem. Whether your change is a one-line bug fix or
+5000 lines of a new feature, there must be an underlying problem that
+motivated you to do this work. Convince the reviewer that there is a
+problem worth fixing and that it makes sense for them to read past the
+first paragraph.
+
+Once the problem is established, describe what you are actually doing
+about it in technical detail. It's important to describe the change
+in plain English for the reviewer to verify that the code is behaving
+as you intend it to.
+
+Solve only one problem per commit. If your description starts to get
+long, that's a sign that you probably need to split up your commit.
+See [Separate your changes](#separate-your-changes).
+
+Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
+instead of "[This commit] makes xyzzy do frotz" or "[I] changed xyzzy
+to do frotz", as if you are giving orders to the codebase to change
+its behaviour.
+
+If your change fixes a bug in a specific commit, e.g. you found an issue using
+`git bisect`, please use the `Fixes:` tag with the abbreviation of
+the SHA-1 ID, and the one line summary. For example:
+
+```
+Fixes: 9433b7b9db3e ("make: use cflags/ldflags for config.h detection mechanism")
+```
+
+The following `git config` settings can be used to add a pretty format for
+outputting the above style in the `git log` or `git show` commands:
+
+```
+[pretty]
+ fixes = Fixes: %h (\"%s\")
+```
+
+If your change address an issue listed in GitHub, please use `Fixes:` tag with the number of the issue. For instance:
+
+```
+Fixes: #339
+```
+
+The `Fixes:` tags should be put at the end of the detailed description.
+
+Please add a prefix to your commit subject line describing the part of the
+project your change is related to. This can be either the name of the file or
+directory you changed, or just a general word. If your patch is touching
+multiple components you may separate prefixes with "/"-es. Here are some good
+examples of subject lines from git log:
+
+```
+criu-ns: Convert to python3 style print() syntax
+compel: Calculate sh_addr if not provided by linker
+style: Enforce kernel style -Wstrict-prototypes
+rpc/libcriu: Add lsm-profile option
+```
+
+You may refer to [How to Write a Git Commit
+Message](https://chris.beams.io/posts/git-commit/) article for
+recommendations for good commit message.
+
+## Separate your changes
+
+Separate each **logical change** into a separate commit.
+
+For example, if your changes include both bug fixes and performance
+enhancements for a single driver, separate those changes into two
+or more commits. If your changes include an API update, and a new
+driver which uses that new API, separate those into two commits.
+
+On the other hand, if you make a single change to numerous files,
+group those changes into a single commit. Thus a single logical change
+is contained within a single commit.
+
+The point to remember is that each commit should make an easily understood
+change that can be verified by reviewers. Each commit should be justifiable
+on its own merits.
+
+When dividing your change into a series of commits, take special care to
+ensure that CRIU builds and runs properly after each commit in the
+series. Developers using `git bisect` to track down a problem can end up
+splitting your patch series at any point; they will not thank you if you
+introduce bugs in the middle.
+
+## Sign your work
+
+To improve tracking of who did what, we ask you to sign off the commits in
+your fork of CRIU or the patches that are to be emailed.
+
+The sign-off is a simple line at the end of the explanation for the
+patch, which certifies that you wrote it or otherwise have the right to
+pass it on as an open-source patch. The rules are pretty simple: if you
+can certify the below:
+
+### Developer's Certificate of Origin 1.1
+ By making a contribution to this project, I certify that:
+
+ (a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+ (b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+ (c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+ (d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
+
+then you just add a line saying
+
+```
+Signed-off-by: Random J Developer
+```
+
+using your real name (please, no pseudonyms or anonymous contributions if
+it possible).
+
+Hint: you can use `git commit -s` to add Signed-off-by line to your
+commit message. To append such line to a commit you already made, use
+`git commit --amend -s`.
+
+```
+ From: Random J Developer
+Subject: [PATCH] component: Short patch description
+
+Long patch description (could be skipped if patch
+is trivial enough)
+
+Signed-off-by: Random J Developer
+---
+Patch body here
+```
+
+## Submit your work upstream
+
+We accept GitHub pull requests and this is the preferred way to contribute to CRIU.
+For that you should push your work to your fork of CRIU at [GitHub](https://github.com) and create a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)
+
+### Pull request guidelines
+
+Pull request comment should contain description of the problem your changes
+solve and a brief outline of the changes included in the pull request.
+
+Please avoid pushing fixup commits to an existent pull request. Each commit
+should be self contained and there should not be fixup commits in a patch
+series. Pull requests that contain one commit which breaks something
+and another commit which fixes it, will be rejected.
+
+Please merge the fixup commits into the commits that has introduced the
+problem before creating a pull request.
+
+It may happen that the reviewers were not completely happy with your
+changes and requested changes to your patches. After you updated your
+changes please close the old pull request and create a new one that
+contains the following:
+
+* Description of the problem your changes solve and a brief outline of the
+ changes
+* Link to the previous version of the pull request
+* Brief description of the changes between old and new versions of the pull
+ request. If there were more than one previous pull request, all the
+ revisions should be listed. For example:
+
+```
+v3: rebase on the current criu-dev
+v2: add commit to foo() and update bar() coding style
+```
+
+If there are only minor updates to the commits in a pull request, it is
+possible to force-push them into an existing pull request. This only applies
+to small changes and should be used with care. If you update an existing
+pull request, remember to add the description of the changes from the
+previous version.
+
+### Mailing list submission
+
+Historically, CRIU worked with mailing lists and patches so if you still prefer this way continue reading till the end of this section.
+
+### Make a patch
+
+To create a patch, run
+
+```
+git format-patch --signoff origin/criu-dev
+```
+
+You might need to read GIT documentation on how to prepare patches
+for mail submission. Take a look at http://book.git-scm.com/ and/or
+http://git-scm.com/documentation for details. It should not be hard
+at all.
+
+We recommend to post patches using `git send-email`
+
+```
+git send-email --cover-letter --no-chain-reply-to --annotate \
+ --confirm=always --to=criu@lists.linux.dev criu-dev
+```
+
+Note that the `git send-email` subcommand may not be in
+the main git package and using it may require installation of a
+separate package, for example the "git-email" package in Fedora and
+Debian.
+
+If this is your first time using git send-email, you might need to
+configure it to point it to your SMTP server with something like:
+
+```
+git config --global sendemail.smtpServer stmp.example.net
+```
+
+If you get tired of typing `--to=criu@lists.linux.dev` all the time,
+you can configure that to be automatically handled as well:
+
+```
+git config sendemail.to criu@lists.linux.dev
+```
+
+If a developer is sending another version of the patch (e.g. to address
+review comments), they are advised to note differences to previous versions
+after the `---` line in the patch so that it helps reviewers but
+doesn't become part of git history. Moreover, such patch needs to be prefixed
+correctly with `--subject-prefix=PATCHv2` appended to
+`git send-email` (substitute `v2` with the correct
+version if needed though).
+
+### Mail patches
+
+The patches should be sent to CRIU development mailing list, `criu AT lists.linux.dev`. Note that you need to be subscribed first in order to post. The list web interface is available at https://lore.kernel.org/criu; you can also use standard mailman aliases to work with it.
+
+Please make sure the email client you're using doesn't screw your patch (line wrapping and so on).
+
+> **Note:** When sending a patch set that consists of more than one patch, please, push your changes in your local repo and provide the URL of the branch in the cover-letter
+
+### Wait for response
+
+Be patient. Most CRIU developers are pretty busy people so if
+there is no immediate response on your patch — don't be surprised,
+sometimes a patch may fly around a week before it gets reviewed.
+
+## Continuous integration
+
+Wiki article: [Continuous integration](https://criu.org/Continuous_integration)
+
+CRIU tests are run for each series sent to the mailing list. If you get a message from our patchwork that patches failed to pass the tests, you have to investigate what is wrong.
diff --git a/Documentation/HOWTO.cross-compile b/Documentation/HOWTO.cross-compile
index f1b17842b..44b19dfea 100644
--- a/Documentation/HOWTO.cross-compile
+++ b/Documentation/HOWTO.cross-compile
@@ -1,4 +1,10 @@
-This HOWTO explains how to cross-compile CRIU on x86
+How to cross-compile CRIU on x86:
+
+Use the Dockerfile provided:
+ scripts/build/Dockerfile.armv7-cross
+
+Historical guide how-to do it without docker container:
+[Unsupported, may not work anymore!]
1. Download the protobuf sources.
2. Apply the patch http://16918.selcdn.ru/crtools/aarch64/0001-protobuf-added-the-support-for-the-acrchitecture-AAr.patch
diff --git a/Documentation/Makefile b/Documentation/Makefile
index cbc7ff2c8..de0cc448d 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -12,7 +12,9 @@ endif
FOOTER := footer.txt
SRC1 += crit.txt
+SRC1 += criu-ns.txt
SRC1 += compel.txt
+SRC1 += criu-amdgpu-plugin.txt
SRC8 += criu.txt
SRC := $(SRC1) $(SRC8)
XMLS := $(patsubst %.txt,%.xml,$(SRC))
@@ -54,7 +56,7 @@ ifneq ($(USE_ASCIIDOCTOR),)
$(Q) $(ASCIIDOC) -b manpage -d manpage -o $@ $<
else
$(Q) $(ASCIIDOC) -b docbook -d manpage -o $(patsubst %.1,%.xml,$@) $<
- $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.1,%.xml,$@) 2>/dev/null
+ $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.1,%.xml,$@)
endif
%.8: %.txt $(FOOTER) custom.xsl
@@ -63,7 +65,7 @@ ifneq ($(USE_ASCIIDOCTOR),)
$(Q) $(ASCIIDOC) -b manpage -d manpage -o $@ $<
else
$(Q) $(ASCIIDOC) -b docbook -d manpage -o $(patsubst %.8,%.xml,$@) $<
- $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.8,%.xml,$@) 2>/dev/null
+ $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.8,%.xml,$@)
endif
%.ps: %.1
diff --git a/Documentation/compel.txt b/Documentation/compel.txt
index 744a3b35d..506228f59 100644
--- a/Documentation/compel.txt
+++ b/Documentation/compel.txt
@@ -86,18 +86,21 @@ Infecting code
~~~~~~~~~~~~~~
The parasitic code is compiled and converted to a header using *compel*, and included here.
-*#include *
+*#include *
*#include "parasite.h"*
-Following steps are perfomed to infect the victim process:
+Following steps are performed to infect the victim process:
- stop the task: *int compel_stop_task(int pid);*
- prepare infection handler: *struct parasite_ctl *compel_prepare(int pid);*
- execute system call: *int compel_syscall(ctl, int syscall_nr, long *ret, int arg ...);*
- infect victim: *int compel_infect(ctl, nr_thread, size_of_args_area);*
- cure the victim: *int compel_cure(ctl);* //ctl pointer is freed by this call
- - Resume victim: *int compel_resume_task(pid, orig_state, state);*
+ - Resume victim: *int compel_resume_task(pid, orig_state, state)* or
+ *int compel_resume_task_sig(pid, orig_state, state, stop_signo).*
+ //compel_resume_task_sig() could be used in case when victim is in stopped state.
+ stop_signo could be read by calling compel_parse_stop_signo().
*ctl* must be configured with blob information by calling *PREFIX_setup_c_header()*, with ctl as its argument.
*PREFIX* is the argument given to *-p* when calling hgen, else it is deduced from file name.
diff --git a/Documentation/criu-amdgpu-plugin.txt b/Documentation/criu-amdgpu-plugin.txt
new file mode 100644
index 000000000..fe76fc3bc
--- /dev/null
+++ b/Documentation/criu-amdgpu-plugin.txt
@@ -0,0 +1,114 @@
+ROCM Support(1)
+===============
+
+NAME
+----
+criu-amdgpu-plugin - A plugin extension to CRIU to support checkpoint/restore in
+userspace for AMD GPUs.
+
+
+CURRENT SUPPORT
+---------------
+Single and Multi GPU systems (Gfx9)
+Checkpoint / Restore on different system
+Checkpoint / Restore inside a docker container
+Pytorch
+Tensorflow
+Using CRIU Image Streamer
+Parallel Restore
+
+DESCRIPTION
+-----------
+Though *criu* is a great tool for checkpointing and restoring running
+applications, it has certain limitations such as it cannot handle
+applications that have device files open. In order to support *ROCm* based
+workloads with *criu* we need to augment criu's core functionality with a
+plugin based extension mechanism. *criu-amdgpu-plugin* provides the necessary support
+to criu to allow Checkpoint / Restore with ROCm.
+
+
+Dependencies
+------------
+*amdkfd support*::
+ In order to snapshot the *VRAM* and other *GPU* device states, we require
+ an updated version of amdkfd(amdgpu) driver.
+
+OPTIONS
+-------
+Optional parameters can be passed in as environment variables before
+executing criu command.
+
+*KFD_FW_VER_CHECK*::
+ Enable or disable firmware version check.
+ If enabled, firmware version on restored gpu needs to be greater than or
+ equal firmware version on checkpointed GPU. Default:Enabled
+
+ E.g:
+ KFD_FW_VER_CHECK=0
+
+*KFD_SDMA_FW_VER_CHECK*::
+ Enable or disable SDMA firmware version check.
+ If enabled, SDMA firmware version on restored gpu needs to be greater than or
+ equal firmware version on checkpointed GPU. Default:Enabled
+
+ E.g:
+ KFD_SDMA_FW_VER_CHECK=0
+
+*KFD_CACHES_COUNT_CHECK*::
+ Enable or disable caches count check. If enabled, the caches count on
+ restored GPU needs to be greater than or equal caches count on checkpointed
+ GPU. Default:Enabled
+
+ E.g:
+ KFD_CACHES_COUNT_CHECK=0
+
+*KFD_NUM_GWS_CHECK*::
+ Enable or disable num_gws check. If enabled, the num_gws on
+ restored GPU needs to be greater than or equal num_gws on checkpointed
+ GPU. Default:Enabled
+
+ E.g:
+ KFD_NUM_GWS_CHECK=0
+
+*KFD_VRAM_SIZE_CHECK*::
+ Enable or disable VRAM size check. If enabled, the VRAM size on
+ restored GPU needs to be greater than or equal VRAM size on checkpointed
+ GPU. Default:Enabled
+
+ E.g:
+ KFD_VRAM_SIZE_CHECK=0
+
+*KFD_NUMA_CHECK*::
+ Enable or disable NUMA CPU region check. If enabled, the plugin will restore
+ GPUs that belong to one CPU NUMA region to the same CPU NUMA region.
+ Default:Enabled
+
+ E.g:
+ KFD_NUMA_CHECK=1
+
+*KFD_CAPABILITY_CHECK*::
+ Enable or disable capability check. If enabled, the capability on
+ restored GPU needs to be equal to the capability on the checkpointed GPU.
+ Default:Enabled
+
+ E.g:
+ KFD_CAPABILITY_CHECK=1
+
+*KFD_MAX_BUFFER_SIZE*::
+ On some systems, VRAM sizes may exceed RAM sizes, and so buffers for dumping
+ and restoring VRAM may be unable to fit. Set to a nonzero value (in bytes)
+ to set a limit on the plugin's memory usage.
+ Default:0 (Disabled)
+
+ E.g:
+ KFD_MAX_BUFFER_SIZE="2G"
+
+
+AUTHOR
+------
+The AMDKFD team.
+
+
+COPYRIGHT
+---------
+Copyright \(C) 2020-2021, Advanced Micro Devices, Inc. (AMD)
diff --git a/Documentation/criu-ns.txt b/Documentation/criu-ns.txt
new file mode 100644
index 000000000..c6594a9bc
--- /dev/null
+++ b/Documentation/criu-ns.txt
@@ -0,0 +1,32 @@
+CRIU-NS(1)
+==========
+include::footer.txt[]
+
+NAME
+----
+criu-ns - run criu in different namespaces
+
+SYNOPSIS
+--------
+*criu-ns* 'dump' -t PID []
+
+*criu-ns* 'pre-dump' -t PID []
+
+*criu-ns* 'restore' []
+
+*criu-ns* 'check' []
+
+DESCRIPTION
+-----------
+The *criu-ns* command executes 'criu' in a new PID and mount namespace.
+The purpose of this wrapper script is to enable restoring a process tree
+that might require a specific PID that is already used on the system;
+so called "PID mismatch" problem.
+
+SEE ALSO
+--------
+nsenter(1) namespaces(7) criu(8)
+
+AUTHOR
+------
+The CRIU team
diff --git a/Documentation/criu.txt b/Documentation/criu.txt
index 94fc5428a..0c9a9e527 100644
--- a/Documentation/criu.txt
+++ b/Documentation/criu.txt
@@ -24,8 +24,8 @@ on a different system, or both.
OPTIONS
-------
-Most of the true / false long options (the ones without arguments) can be
-prefixed with *--no-* to negate the option (example: *--display-stats*
+Most of the long flags can be
+prefixed with *no-* to negate the option (example: *--display-stats*
and *--no-display-stats*).
Common options
@@ -33,12 +33,11 @@ Common options
Common options are applicable to any 'command'.
*-v*[*v*...], *--verbosity*::
- Increase verbosity up from the default level. Multiple *v* can be used,
- each increasing verbosity by one level. Using long option without argument
- increases verbosity by one level.
+ Increase verbosity up from the default level. In case of short option,
+ multiple *v* can be used, each increasing verbosity by one.
-*-v*'num', *--verbosity*='num'::
- Set verbosity level to 'num'. The higher the level, the more output
+**-v**__num__, **--verbosity=**__num__::
+ Set verbosity level to _num_. The higher the level, the more output
is produced.
+
The following levels are available:
@@ -57,26 +56,31 @@ The following levels are available:
Pass a specific configuration file to criu.
*--no-default-config*::
- Forbid parsing of default configuration files.
+ Disable parsing of default configuration files.
*--pidfile* 'file'::
Write root task, service or page-server pid into a 'file'.
*-o*, *--log-file* 'file'::
- Write logging messages to 'file'.
+ Write logging messages to a 'file'.
*--display-stats*::
- During dump as well as during restore *criu* collects information
- like the time required to dump or restore the process or the
+ During dump, as well as during restore, *criu* collects some statistics,
+ like the time required to dump or restore the process, or the
number of pages dumped or restored. This information is always
- written to the files 'stats-dump' and 'stats-restore' and can
- be easily displayed using *crit*. The option *--display-stats*
- additionally prints out this information on the console at the end
- of a dump or a restore.
+ saved to the *stats-dump* and *stats-restore* files, and can
+ be shown using *crit*(1). The option *--display-stats*
+ prints out this information on the console at the end
+ of a dump or restore operation.
*-D*, *--images-dir* 'path'::
Use 'path' as a base directory where to look for sets of image files.
+*--stream*::
+ dump/restore images using criu-image-streamer.
+ See https://github.com/checkpoint-restore/criu-image-streamer for detailed
+ usage.
+
*--prev-images-dir* 'path'::
Use 'path' as a parent directory where to look for sets of image files.
This option makes sense in case of incremental dumps.
@@ -91,6 +95,19 @@ The following levels are available:
*-L*, *--libdir* 'path'::
Path to plugins directory.
+*--enable-fs* ['fs'[,'fs'...]]::
+ Specify a comma-separated list of filesystem names that should
+ be auto-detected. The value 'all' enables auto-detection for
+ all filesystems.
++
+Note: This option is not safe, use at your own risk.
+Auto-detecting a filesystem mount assumes that the mountpoint can
+be restored with *mount(src, mountpoint, flags, options)*. When used,
+*dump* is expected to always succeed if a mountpoint is to be
+auto-detected, however *restore* may fail (or do something wrong)
+if the assumption for restore logic is incorrect. This option is
+not compatible with *--external* *dev*.
+
*--action-script* 'script'::
Add an external action script to be executed at certain stages.
The environment variable *CRTOOLS_SCRIPT_ACTION* is available
@@ -138,6 +155,17 @@ The following levels are available:
notification message contains a file descriptor for
the master pty
+ *query-ext-files*:::
+ called after the process tree is stopped and network is locked.
+ This hook is used only in the RPC mode. The notification reply
+ contains file ids to be added to external file list (may be empty).
+
+*--unprivileged*::
+ This option tells *criu* to accept the limitations when running
+ as non-root. Running as non-root requires *criu* at least to have
+ *CAP_SYS_ADMIN* or *CAP_CHECKPOINT_RESTORE*. For details about running
+ *criu* as non-root please consult the *NON-ROOT* section.
+
*-V*, *--version*::
Print program version and exit.
@@ -156,6 +184,12 @@ In addition, *page-server* options may be specified.
Turn on memory changes tracker in the kernel. If the option is
not passed the memory tracker get turned on implicitly.
+*--pre-dump-mode*='mode'::
+ There are two 'mode' to operate pre-dump algorithm. The 'splice' mode
+ is parasite based, whereas 'read' mode is based on process_vm_readv
+ syscall. The 'read' mode incurs reduced frozen time and reduced
+ memory pressure as compared to 'splice' mode. Default is 'splice' mode.
+
*dump*
~~~~~~
Performs a checkpoint procedure.
@@ -179,7 +213,7 @@ In other words, do not use it unless really needed.
*-s*, *--leave-stopped*::
Leave tasks in stopped state after checkpoint, instead of killing.
-*--external* 'type'*[*'id'*]:*'value'::
+*--external* __type__**[**__id__**]:**__value__::
Dump an instance of an external resource. The generic syntax is
'type' of resource, followed by resource 'id' (enclosed in literal
square brackets), and optional 'value' (prepended by a literal colon).
@@ -188,35 +222,48 @@ In other words, do not use it unless really needed.
Note to restore external resources, either *--external* or *--inherit-fd*
is used, depending on resource type.
-*--external mnt[*'mountpoint'*]:*'name'::
+*--external* **mnt[**__mountpoint__**]:**__name__::
Dump an external bind mount referenced by 'mountpoint', saving it
to image under the identifier 'name'.
-*--external mnt[]:*'flags'::
+*--external* **mnt[]:**__flags__::
Dump all external bind mounts, autodetecting those. Optional 'flags'
can contain *m* to also dump external master mounts, *s* to also
dump external shared mounts (default behavior is to abort dumping
if such mounts are found). If 'flags' are not provided, colon
is optional.
-*--external dev[*'major'*/*'minor'*]:*'name'::
+*--external* **dev[**__major__**/**__minor__**]:**__name__::
Allow to dump a mount namespace having a real block device mounted.
A block device is identified by its 'major' and 'minor' numbers,
and *criu* saves its information to image under the identifier 'name'.
-*--external file[*'mnt_id'*:*'inode'*]*::
+*--external* **file[**__mnt_id__**:**__inode__**]**::
Dump an external file, i.e. an opened file that is can not be resolved
from the current mount namespace, which can not be dumped without using
this option. The file is identified by 'mnt_id' (a field obtained from
- */proc/*'pid'*/fdinfo/*'N') and 'inode' (as returned by *stat*(2)).
+ **/proc/**__pid__**/fdinfo/**__N__) and 'inode' (as returned by
+ *stat*(2)).
-*--external tty[*'rdev'*:*'dev'*]*::
+*--external* **tty[**__rdev__**:**__dev__**]**::
Dump an external TTY, identified by *st_rdev* and *st_dev* fields
returned by *stat*(2).
-*--external unix[*'id'*]*::
+*--external* **unix[**__id__**]**::
Tell *criu* that one end of a pair of UNIX sockets (created by
- *socketpair*(2)) with 'id' is OK to be disconnected.
+ *socketpair*(2)) with the given _id_ is OK to be disconnected.
+
+*--external* **net[**__inode__**]:**__name__::
+ Mark a network namespace as external and do not include it in the
+ checkpoint. The label 'name' can be used with *--inherit-fd* during
+ restore to specify a file descriptor to a preconfigured network
+ namespace.
+
+*--external* **pid[**__inode__**]:**__name__::
+ Mark a PID namespace as external. This can be later used to restore
+ a process into an existing PID namespace. The label 'name' can be
+ used to assign another PID namespace during restore with the help
+ of *--inherit-fd*.
*--freeze-cgroup*::
Use cgroup freezer to collect processes.
@@ -266,14 +313,42 @@ For example, the command line for the above example should look like this:
discovered automatically (usually via */proc*). This option is
useful when one needs *criu* to skip some controllers.
-*--cgroup-props-ignore-default*::
- When combined with *--cgroup-props*, makes *criu* substitute
- a predefined controller property with the new one shipped. If the option
- is not used, the predefined properties are merged with the provided ones.
+*--cgroup-yard* 'path'::
+ Instead of trying to mount cgroups in CRIU, provide a path to a directory
+ with already created cgroup yard. Useful if you don't want to grant
+ CAP_SYS_ADMIN to CRIU. For every cgroup mount there should be exactly one
+ directory. If there is only one controller in this mount, the dir's name
+ should be just the name of the controller. If there are multiple controllers
+ comounted, the directory name should have them be separated by a comma.
++
+For example, if */proc/cgroups* looks like this:
++
+----------
+#subsys_name hierarchy num_cgroups enabled
+cpu 1 1 1
+devices 2 2 1
+freezer 2 2 1
+----------
++
+then you can create the cgroup yard by the following commands:
++
+----------
+mkdir private_yard
+cd private_yard
+mkdir cpu
+mount -t cgroup -o cpu none cpu
+mkdir devices,freezer
+mount -t cgroup -o devices,freezer none devices,freezer
+----------
*--tcp-established*::
Checkpoint established TCP connections.
+*--tcp-close*::
+ Don't dump the state of, or block, established tcp connections
+ (including the connection is once established but now closed).
+ This is useful when tcp connections are not going to be restored.
+
*--skip-in-flight*::
This option skips in-flight TCP connections. If any TCP connections
that are not yet completely established are found, *criu* ignores
@@ -303,6 +378,10 @@ For example, the command line for the above example should look like this:
Allows to link unlinked files back, if possible (modifies filesystem
during *restore*).
+*--timeout* 'number'::
+ Set a time limit in seconds for collecting tasks during the
+ dump operation. The timeout is 10 seconds by default.
+
*--ghost-limit* 'size'::
Set the maximum size of deleted file to be carried inside image.
By default, up to 1M file is allowed. Using this
@@ -310,6 +389,13 @@ For example, the command line for the above example should look like this:
'size' may be postfixed with a *K*, *M* or *G*, which stands for kilo-,
mega, and gigabytes, accordingly.
+*--ghost-fiemap*::
+ Enable an optimization based on fiemap ioctl that can reduce the
+ number of system calls used when checkpointing highly sparse ghost
+ files. This option is enabled by default, and it can be disabled
+ with *--no-ghost-fiemap*. An automatic fallback to SEEK_HOLE/SEEK_DATA
+ is used when fiemap is not supported.
+
*-j*, *--shell-job*::
Allow one to dump shell jobs. This implies the restored task will
inherit session and process group ID from the *criu* itself.
@@ -347,22 +433,78 @@ By default the option is set to *fpu* and *ins*.
option is intended for post-copy (lazy) migration and should be
used in conjunction with *restore* with appropriate options.
+*--file-validation* ['mode']::
+ Set the method to be used to validate open files. Validation is done
+ to ensure that the version of the file being restored is the same
+ version when it was dumped.
++
+The 'mode' may be one of the following:
+
+ *filesize*:::
+ To explicitly use only the file size check all the time.
+ This is the fastest and least intensive check.
+
+ *buildid*:::
+ To validate ELF files with their build-ID. If the
+ build-ID cannot be obtained, 'chksm-first' method will be
+ used. This is the default if mode is unspecified.
+
+*--network-lock* ['mode']::
+ Set the method to be used for network locking/unlocking. Locking is done
+ to ensure that tcp packets are dropped between dump and restore. This is
+ done to avoid the kernel sending RST when a packet arrives destined for
+ the dumped process.
++
+The 'mode' may be one of the following:
+
+ *iptables*::: Use iptables rules to drop the packets.
+ This is the default if 'mode' is not specified.
+
+ *nftables*::: Use nftables rules to drop the packets.
+
+ *skip*::: Don't lock the network. If *--tcp-close* is not used, the network
+ must be locked externally to allow CRIU to dump TCP connections.
+
+*--allow-uprobes*::
+ Allow dumping when uprobes vma is present. When used on dump, this option is
+ required on restore as well.
+
+ A uprobes vma is automatically created by the kernel once a uprobe is
+ triggered. This mapping is not removed even once the uprobe is deleted. So,
+ even if a process once had uprobes attached to it, and they're removed by
+ the time the process is dumped, this option is still required because criu
+ has no way of knowing whether there are active uprobes or not.
+
+ When using this option on restore, make sure the uprobes (if any) active on
+ the dumped processes are still active. Otherwise, when execution reaches
+ a uprobe'd location in any of the restored processes, that process will be
+ sent a SIGTRAP.
+
+ As an example, say a uprobe is set at function foo in the executable of the
+ process p_bar. Whenever execution in p_bar reaches function foo, the uprobe
+ is triggered. If the uprobe has been triggered at least once, then the kernel
+ will have created the uprobes vma. To dump p_bar, this option is
+ necessary. After dumping, say the uprobe is deleted. Now, on restoring with
+ this option, once execution reaches function foo, SIGTRAP will be sent to
+ the restored p_bar. Unless it has a signal handler installed for SIGTRAP,
+ it will be terminated and core dumped.
+
*restore*
~~~~~~~~~
Restores previously checkpointed processes.
-*--inherit-fd* *fd[*'N'*]:*'resource'::
+*--inherit-fd* **fd[**__N__**]:**__resource__::
Inherit a file descriptor. This option lets *criu* use an already opened
file descriptor 'N' for restoring a file identified by 'resource'.
This option can be used to restore an external resource dumped
- with the help of *--external* *file*, *tty*, and *unix* options.
+ with the help of *--external* *file*, *tty*, *pid* and *unix* options.
+
The 'resource' argument can be one of the following:
+
- - *tty[*'rdev'*:*'dev'*]*
- - *pipe[*'inode'*]*
- - *socket[*'inode'*]*
- - *file[*'mnt_id'*:*'inode'*]*
+ - **tty[**__rdev__**:**__dev__**]**
+ - **pipe:[**__inode__**]**
+ - **socket:[**__inode__*]*
+ - **file[**__mnt_id__**:**__inode__**]**
- 'path/to/file'
+
@@ -385,8 +527,10 @@ usually need to be escaped from shell.
*-r*, *--root* 'path'::
Change the root filesystem to 'path' (when run in a mount namespace).
+ This option is required to restore a mount namespace. The directory
+ 'path' must be a mount point and its parent must not be overmounted.
-*--external* 'type'*[*'id'*]:*'value'::
+*--external* __type__**[**__id__**]:**__value__::
Restore an instance of an external resource. The generic syntax is
'type' of resource, followed by resource 'id' (enclosed in literal
square brackets), and optional 'value' (prepended by a literal colon).
@@ -396,7 +540,7 @@ usually need to be escaped from shell.
the help of *--external* *file*, *tty*, and *unix* options), option
*--inherit-fd* should be used.
-*--external mnt[*'name'*]:*'mountpoint'::
+*--external* **mnt[**__name__**]:**__mountpoint__::
Restore an external bind mount referenced in the image by 'name',
bind-mounting it from the host 'mountpoint' to a proper mount point.
@@ -404,26 +548,36 @@ usually need to be escaped from shell.
Restore all external bind mounts (dumped with the help of
*--external mnt[]* auto-detection).
-*--external dev[*'name'*]:*'/dev/path'::
+*--external* **dev[**__name__**]:**__/dev/path__::
Restore an external mount device, identified in the image by 'name',
using the existing block device '/dev/path'.
-*--external veth[*'inner_dev'*]:*'outer_dev'*@*'bridge'::
+*--external* **veth[**__inner_dev__**]:**__outer_dev__**@**__bridge__::
Set the outer VETH device name (corresponding to 'inner_dev' being
- restored) to 'outer_dev'. If optional *@*'bridge' is specified,
+ restored) to 'outer_dev'. If optional **@**_bridge_ is specified,
'outer_dev' is added to that bridge. If the option is not used,
'outer_dev' will be autogenerated by the kernel.
-*--external macvlan[*'inner_dev'*]:*'outer_dev'::
+*--external* **macvlan[**__inner_dev__**]:**__outer_dev__::
When restoring an image that have a MacVLAN device in it, this option
must be used to specify to which 'outer_dev' (an existing network device
in CRIU namespace) the restored 'inner_dev' should be bound to.
+*-J*, *--join-ns* **NS**:{**PID**|**NS_FILE**}[,**EXTRA_OPTS**]::
+ Restore process tree inside an existing namespace. The namespace can
+ be specified in 'PID' or 'NS_FILE' path format (example:
+ *--join-ns net:12345* or *--join-ns net:/foo/bar*). Currently supported
+ values for **NS** are: *ipc*, *net*, *time*, *user*, and *uts*.
+ This option doesn't support joining a PID namespace, however, this is
+ possible using *--external* and *--inheritfd*. 'EXTRA_OPTS' is optional
+ and can be used to specify UID and GID for user namespace (e.g.,
+ *--join-ns user:PID,UID,GID*).
+
*--manage-cgroups* ['mode']::
Restore cgroups configuration associated with a task from the image.
Controllers are always restored in an optimistic way -- if already present
in system, *criu* reuses it, otherwise it will be created.
-
++
The 'mode' may be one of the following:
*none*::: Do not restore cgroup properties but require cgroup to
@@ -433,7 +587,7 @@ The 'mode' may be one of the following:
*soft*::: Restore cgroup properties if only cgroup has been created
by *criu*, otherwise do not restore properties. This is the
- default if mode is unspecified.
+ default if mode is unspecified.
*full*::: Always restore all cgroups and their properties.
@@ -442,6 +596,11 @@ The 'mode' may be one of the following:
*ignore*::: Don't deal with cgroups and pretend that they don't exist.
+*--cgroup-yard* 'path'::
+ Instead of trying to mount cgroups in CRIU, provide a path to a directory
+ with already created cgroup yard. For more information look in the *dump*
+ section.
+
*--cgroup-root* ['controller'*:*]/'newroot'::
Change the root cgroup the controller will be installed into. No controller
means that root is the default for all controllers not specified.
@@ -454,16 +613,38 @@ The 'mode' may be one of the following:
*--tcp-close*::
Restore connected TCP sockets in closed state.
-*--veth-pair* 'IN'*=*'OUT'::
+*--veth-pair* __IN__**=**__OUT__::
Correspondence between outside and inside names of veth devices.
*-l*, *--file-locks*::
Restore file locks from the image.
-*--lsm-profile* 'type'*:*'name'::
- Specify an LSM profile to be used during restore. The `type` can be
+*--lsm-profile* __type__**:**__name__::
+ Specify an LSM profile to be used during restore. The _type_ can be
either *apparmor* or *selinux*.
+*--lsm-mount-context* 'context'::
+ Specify a new mount context to be used during restore.
++
+This option will only replace existing mount context information
+with the one specified with this option. Mounts without the
+'context=' option will not be changed.
++
+If a mountpoint has been checkpointed with an option like
+
+ context="system_u:object_r:container_file_t:s0:c82,c137"
++
+it is possible to change this option using
+
+ --lsm-mount-context "system_u:object_r:container_file_t:s0:c204,c495"
++
+which will result that the mountpoint will be restored
+with the new 'context='.
++
+This option is useful if using *selinux* and if the *selinux*
+labels need to be changed on restore like if a container is
+restored into an existing Pod.
+
*--auto-dedup*::
As soon as a page is restored it get punched out from image.
@@ -516,6 +697,29 @@ are not adequate, but this can be suppressed by using *--cpu-cap=none*.
restored process.
This option requires running *lazy-pages* daemon.
+*--file-validation* ['mode']::
+ Set the method to be used to validate open files. Validation is done
+ to ensure that the version of the file being restored is the same
+ version when it was dumped.
++
+The 'mode' may be one of the following:
+
+ *filesize*:::
+ To explicitly use only the file size check all the time.
+ This is the fastest and least intensive check.
+
+ *buildid*:::
+ To validate ELF files with their build-ID. If the
+ build-ID cannot be obtained, 'chksm-first' method will be
+ used. This is the default if mode is unspecified.
+
+*--skip-file-rwx-check*::
+ Skip checking file permissions (r/w/x for u/g/o) on restore.
+
+*--allow-uprobes*::
+ Required when dumped with this option. Refer to this option in the section
+ on dumping for more details.
+
*check*
~~~~~~~
Checks whether the kernel supports the features needed by *criu* to
@@ -526,17 +730,17 @@ check* always checks Category 1 features unless *--feature* is specified
which only checks a specified feature.
*Category 1*::: Absolutely required. These are features like support for
- */proc/PID/map_files*, *NETLINK_SOCK_DIAG* socket
- monitoring, */proc/sys/kernel/ns_last_pid* etc.
+ */proc/PID/map_files*, *NETLINK_SOCK_DIAG* socket
+ monitoring, */proc/sys/kernel/ns_last_pid* etc.
*Category 2*::: Required only for specific cases. These are features
- like AIO remap, */dev/net/tun* and others that are only
- required if a process being dumped or restored
- is using those.
+ like AIO remap, */dev/net/tun* and others that are only
+ required if a process being dumped or restored
+ is using those.
*Category 3*::: Experimental. These are features like *task-diag* that
- are used for experimental purposes (mostly
- during development).
+ are used for experimental purposes (mostly
+ during development).
If there are no errors or warnings, *criu* prints "Looks good." and its
exit code is 0.
@@ -722,6 +926,42 @@ configuration file will overwrite all other configuration file settings
or RPC options. *This can lead to undesired behavior of criu and
should only be used carefully.*
+NON-ROOT
+--------
+*criu* can be used as non-root with either the *CAP_SYS_ADMIN* capability
+or with the *CAP_CHECKPOINT_RESTORE* capability introduces in Linux kernel 5.9.
+*CAP_CHECKPOINT_RESTORE* is the minimum that is required.
+
+*criu* also needs either *CAP_SYS_PTRACE* or a value of 0 in
+*/proc/sys/kernel/yama/ptrace_scope* (see *ptrace*(2)) to be able to interrupt
+the process for dumping.
+
+Running *criu* as non-root has many limitations and depending on the process
+to checkpoint and restore it may not be possible.
+
+In addition to *CAP_CHECKPOINT_RESTORE* it is possible to give *criu* additional
+capabilities to enable additional features in non-root mode.
+
+Currently *criu* can benefit from the following additional capabilities:
+
+ - *CAP_NET_ADMIN*
+ - *CAP_SYS_CHROOT*
+ - *CAP_SETUID*
+ - *CAP_SYS_RESOURCE*
+
+Note that for some operations, having a capability in a namespace other than
+the init namespace (i.e. the default/root namespace) is not sufficient. For
+example, in order to read symlinks in proc/[pid]/map_files CRIU requires
+CAP_CHECKPOINT_RESTORE in the init namespace; having CAP_CHECKPOINT_RESTORE
+while running in another user namespace (e.g. in a container) does not allow
+CRIU to read symlinks in /proc/[pid]/map_files.
+
+Without access to /proc/[pid]/map_files checkpointing/restoring processes
+that have mapped deleted files may not be possible.
+
+Independent of the capabilities it is always necessary to use "*--unprivileged*" to
+accept *criu*'s limitation in non-root mode.
+
EXAMPLES
--------
To checkpoint a program with pid of *1234* and write all image files into
diff --git a/Documentation/logo.svg b/Documentation/logo.svg
new file mode 100644
index 000000000..f713e72b7
--- /dev/null
+++ b/Documentation/logo.svg
@@ -0,0 +1,136 @@
+
+
+
+
+
+
+
diff --git a/GEMINI.md b/GEMINI.md
new file mode 100644
index 000000000..e56c1de12
--- /dev/null
+++ b/GEMINI.md
@@ -0,0 +1,136 @@
+# CRIU (Checkpoint/Restore In User-space)
+
+CRIU is a tool for saving the state of a running application to a set of files
+(checkpointing) and restoring it back to a live state. It is primarily used for
+live migration of containers, in-place updates, and fast application startup.
+
+It is implemented as a command-line tool called `criu`. The two primary commands
+are `dump` and `restore`.
+
+- `dump`: Saves a process tree and all its related resources (file
+ descriptors, IPC, sockets, namespaces, etc.) into a collection of image
+ files.
+- `restore`: Restores processes from image files to the same state they were
+ in before the dump.
+
+## Quick Start
+
+To get a feel for `criu`, you can try checkpointing and restoring a simple
+process.
+
+1. **Run a simple process:**
+ Open a terminal and run a command that will run for a while. Find its PID.
+ ```bash
+ sleep 1000 &
+ [1] 12345
+ ```
+
+2. **Dump the process:**
+ As root, use `criu dump` with the process ID (`-t`) and a directory for the
+ image files (`-D`).
+ ```bash
+ sudo criu dump -t 12345 -D /tmp/sleep_images -v4 --shell-job
+ ```
+ The `sleep` process will no longer be running.
+
+3. **Restore the process:**
+ Use `criu restore` to bring the process back to life from the images.
+ ```bash
+ sudo criu restore -D /tmp/sleep_images -v4 --shell-job
+ ```
+ The `sleep` process will be running again as if nothing happened.
+
+# For Developers and Contributors
+
+This section contains more technical details about CRIU's internals and
+development process.
+
+## Dump Process
+
+On dump, CRIU uses available kernel interfaces to collect information about
+processes. For properties that can only be retrieved from within the process
+itself, CRIU injects a binary blob (called a "parasite") into the process's
+address space and executes it in the context of one of the process's threads.
+This injection is handled by a subproject called **Compel**.
+
+## Restore Process
+
+On restore, CRIU reads the image files to reconstruct the processes. The goal is
+to restore them to the exact state they were in before the dump. The restore
+process is divided into several stages (defined as `CR_STATE_*` in
+`./criu/include/restorer.h`).
+
+The main `criu` process acts as a coordinator. It first restores resources with
+inter-process dependencies (file descriptors, sockets, shared memory,
+namespaces, etc.). It then forks the process tree and sets up namespaces.
+Finally, it restores process-specific resources like file descriptors and memory
+mappings.
+
+A key step involves a small, self-contained binary called the "restorer". All
+restored processes switch to executing this code, which unmaps the CRIU-specific
+memory and restores the application's original memory mappings. On the final
+step, the restorer calls `sigreturn` on a prepared signal frame to resume the
+process with the state it had at the moment of the dump.
+
+## Compel
+
+Compel is a subproject responsible for generating the binary blobs used for the
+parasite code (for dumping) and the restorer code (for restoring). It provides a
+library for injecting and executing this code within the target process's
+address space. It is a separate project because the logic for generating and
+injecting Position-Independent Executable (PIE) code is complex and
+self-contained.
+
+## Coding Style
+
+The C code in the CRIU project follows the
+[Linux Kernel Coding Style](https://www.kernel.org/doc/html/latest/process/coding-style.html).
+Here are some of the main points:
+
+- **Indentation**: Use tabs, which are set to 8 characters.
+- **Line Length**: The preferred line limit is 80 characters, but it can be
+ extended to 120 if it improves code readability.
+- **Braces**:
+ - The opening brace for a function goes on a new line.
+ - The opening brace for a block (like `if`, `for`, `while`, `switch`) goes
+ on the same line.
+- **Spaces**: Use spaces around operators (`+`, `-`, `*`, `/`, `%`, `<`, `>`,
+ `=`, etc.).
+- **Naming**: Use descriptive names for functions and variables.
+- **Comments**: Use C-style comments (`/* ... */`). For multi-line comments,
+ the preferred format is:
+ ```c
+ /*
+ * This is a multi-line
+ * comment.
+ */
+ ```
+
+## Code Layout
+
+The code is organized into the following directories:
+
+- `./compel`: The Compel sub-project.
+- `./criu`: The main `criu` tool source code.
+- `./images`: Protobuf descriptions for the image files.
+- `./test`: All tests.
+- `./test/zdtm`: The Zero-Downtime Migration (ZDTM) test suite.
+- `./test/zdtm.py`: The executor script for ZDTM tests.
+- `./scripts`: Helper scripts.
+- `./scripts/build`: Docker image files used for CI and cross-compilation
+ checks.
+- `./crit`: A tool to inspect and manipulate CRIU image files.
+- `./soccr`: A library for TCP socket checkpoint/restore.
+
+## Tests
+
+The main test suite is ZDTM. Here is an example of how to run a single test:
+
+```bash
+sudo ./test/zdtm.py run -t zdtm/static/env00
+```
+
+Each ZDTM test has three stages: preparation, C/R, and results checks. During
+the test, a process calls `test_daemon()` to signal it is ready for C/R, then
+calls `test_waitsig()` to wait for the C/R stage to complete. After being
+restored, the test checks that all its resources are still in a valid state.
diff --git a/INSTALL.md b/INSTALL.md
index d786d06eb..af0702518 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -1,11 +1,31 @@
+## Building CRIU from source code
+
+First, you need to install compile-time dependencies. Check [Installation dependencies](https://criu.org/Installation#Dependencies) for more info.
+
+To compile CRIU, run:
+```
+make
+```
+This should create the `./criu/criu` executable.
+
+To change the default behaviour of CRIU, the following variables can be passed
+to the make command:
+
+ * **NETWORK_LOCK_DEFAULT**, can be set to one of the following
+ values: `NETWORK_LOCK_IPTABLES`, `NETWORK_LOCK_NFTABLES`,
+ `NETWORK_LOCK_SKIP`. CRIU defaults to `NETWORK_LOCK_IPTABLES`
+ if nothing is specified. If another network locking backend is
+ needed, `make` can be called like this:
+ `make NETWORK_LOCK_DEFAULT=NETWORK_LOCK_NFTABLES`
+
## Installing CRIU from source code
Once CRIU is built one can easily setup the complete CRIU package
(which includes executable itself, CRIT tool, libraries, manual
and etc) simply typing
-
- make install
-
+```
+make install
+```
this command accepts the following variables:
* **DESTDIR**, to specify global root where all components will be placed under (empty by default);
@@ -16,17 +36,17 @@ this command accepts the following variables:
* **LIBDIR**, to specify directory where to put libraries (guess the correct path by default).
Thus one can type
-
- make DESTDIR=/some/new/place install
-
+```
+make DESTDIR=/some/new/place install
+```
and get everything installed under `/some/new/place`.
## Uninstalling CRIU
To clean up previously installed CRIU instance one can type
-
- make uninstall
-
+```
+make uninstall
+```
and everything should be removed. Note though that if some variable (**DESTDIR**, **BINDIR**
and such) has been used during installation procedure, the same *must* be passed with
uninstall action.
diff --git a/MAINTAINERS b/MAINTAINERS
new file mode 100644
index 000000000..8fee8e571
--- /dev/null
+++ b/MAINTAINERS
@@ -0,0 +1,8 @@
+Pavel Emelyanov (chief)
+Andrey Vagin
+Mike Rapoport
+Dmitry Safonov <0x7f454c46@gmail.com>
+Adrian Reber
+Pavel Tikhomirov
+Radostin Stoyanov
+Alexander Mikhalitsyn
diff --git a/MAINTAINERS_GUIDE.md b/MAINTAINERS_GUIDE.md
new file mode 100644
index 000000000..5de8e6cb6
--- /dev/null
+++ b/MAINTAINERS_GUIDE.md
@@ -0,0 +1,136 @@
+## Introduction
+
+Dear maintainer. Thank you for investing the time and energy to help
+make CRIU as useful as possible. Maintaining a project is difficult,
+sometimes unrewarding work. Sure, you will contribute cool features
+to the project, but most of your time will be spent reviewing patches,
+cleaning things up, documenting, answering questions, justifying design
+decisions - while everyone else will just have fun! But remember -- the
+quality of the maintainers work is what distinguishes the good projects
+from the great. So please be proud of your work, even the unglamorous
+parts, and encourage a culture of appreciation and respect for *every*
+aspect of improving the project -- not just the hot new features.
+
+Being a maintainer is a time consuming commitment and should not be
+taken lightly. This document is a manual for maintainers old and new.
+It explains what is expected of maintainers, how they should work, and
+what tools are available to them.
+
+This is a living document - if you see something out of date or missing,
+speak up!
+
+## What are a maintainer's responsibility?
+
+Part of a healthy project is to have active maintainers to support the
+community in contributions and perform tasks to keep the project running.
+It is every maintainer's responsibility to:
+
+ * Keep the community a friendly place
+ * Deliver prompt feedback and decisions on pull requests and mailing
+ list threads
+ * Encourage other members to help each other, especially in cases the
+ maintainer is overloaded or feels the lack of needed expertise
+ * Make sure the changes made respects the philosophy, design and
+ roadmap of the project
+
+## How are decisions made?
+
+CRIU is an open-source project with an open design philosophy. This
+means that the repository is the source of truth for EVERY aspect of the
+project. *If it's part of the project, it's in the repo. It's in the
+repo, it's part of the project.*
+
+All decisions affecting CRIU, big and small, follow the same 3 steps:
+
+ * Submit a change. Anyone can do this
+
+ * Discuss it. Anyone can and is encouraged to do this
+
+ * Accept or decline it. Only maintainers do this
+
+*I'm a maintainer, should I make pull requests / send patches too?*
+
+Yes. Nobody should ever push to the repository directly. All changes
+should be made through submitting (and accepting) the change.
+
+### Two-steps decision making ###
+
+Since CRIU is extremely complex piece of software we try double hard
+not to make mistakes, that would be hard to fix in the future. In order
+to facilitate this, the "final" decision is made in two stages:
+
+ * We definitely want to try something out
+
+ * We think that the attempt was successful
+
+Respectively, new features get accepted first into the *criu-dev* branch and
+after they have been validated they are merged into the *master* branch. Yet,
+urgent bug fixes may land directly in the master branch. If a change in
+the criu-dev branch is considered to be bad (whatever it means), then it
+can be reverted without propagation to the master branch. Reverting from
+the master branch is expected not to happen at all, but if such an
+extraordinary case occurs, the impact of this step, especially the question
+of backward compatibility, should be considered in the most careful manner.
+
+## Who decides what?
+
+All decisions can be expressed as changes to the repository (either in the
+form of pull requests, or patches sent to the mailing list), and maintainers
+make decisions by merging or rejecting them. Review and approval or
+disagreement can be done by anyone and is denoted by adding a respective
+comment in the pull request. However, merging the change into either branch
+only happens after approvals from maintainers.
+
+In order for a patch to be merged into the criu-dev branch at least two
+maintainers should accept it. In order for a patch to be merged into the
+master branch the majority of maintainers should decide that (then prepare
+a pull request, submit it, etc.).
+
+Overall the maintainer system works because of mutual respect across the
+maintainers of the project. The maintainers trust one another to make
+decisions in the best interests of the project. Sometimes maintainers
+can disagree and this is part of a healthy project to represent the point
+of views of various people. In the case where maintainers cannot find
+agreement on a specific change the role of a Chief Maintainer comes into
+play.
+
+### Chief maintainer
+
+The chief maintainer for the project is responsible for overall architecture
+of the project to maintain conceptual integrity. Large decisions and
+architecture changes should be reviewed by the chief maintainer.
+
+Also the chief maintainer has the veto power on any change submitted
+to any branch. Naturally, a change in the criu-dev branch can be reverted
+after a chief maintainer veto, a change in the master branch must be
+carefully reviewed by the chief maintainer and vetoed in advance.
+
+### How are maintainers added (and removed)?
+
+The best maintainers have a vested interest in the project. Maintainers
+are first and foremost contributors that have shown they are committed to
+the long term success of the project. Contributors wanting to become
+maintainers are expected to be deeply involved in contributing code,
+patches review, and paying needed attention to the issues in the project.
+Just contributing does not make you a maintainer, it is about building trust
+with the current maintainers of the project and being a person that they can
+rely on and trust to make decisions in the best interest of the project.
+
+When a contributor wants to become a maintainer or nominate someone as a
+maintainer, one can submit a "nomination", which technically is the
+respective modification to the `MAINTAINERS` file. When a maintainer feels
+they is unable to perform the required duties, or someone else wants to draw
+the community attention to this fact, one can submit a "(self-)removing"
+change.
+
+The final vote to add or to remove a maintainer is to be approved by the
+majority of current maintainers (with the chief maintainer having veto power
+on that too).
+
+One might have noticed, that the chief maintainer (re-)assignment is not
+regulated by this document. That's true :) However, this can be done. If
+the community decides that the chief maintainer needs to be changed the
+respective "decision making rules" are to be prepared, submitted and
+accepted into this file first.
+
+Good luck!
diff --git a/Makefile b/Makefile
index 0140330e1..e26807158 100644
--- a/Makefile
+++ b/Makefile
@@ -17,34 +17,41 @@ ifeq ($(origin HOSTCFLAGS), undefined)
HOSTCFLAGS := $(CFLAGS) $(USERCFLAGS)
endif
-UNAME-M := $(shell uname -m)
-
#
# Supported Architectures
-ifneq ($(filter-out x86 arm aarch64 ppc64 s390,$(ARCH)),)
+ifneq ($(filter-out x86 arm aarch64 ppc64 s390 mips loongarch64 riscv64,$(ARCH)),)
$(error "The architecture $(ARCH) isn't supported")
endif
# The PowerPC 64 bits architecture could be big or little endian.
# They are handled in the same way.
-ifeq ($(UNAME-M),ppc64)
+ifeq ($(SUBARCH),ppc64)
error := $(error ppc64 big endian is not yet supported)
endif
#
# Architecture specific options.
ifeq ($(ARCH),arm)
- ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7')
- DEFINES := -DCONFIG_ARMV$(ARMV) -DCONFIG_VDSO_32
+ ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7')
ifeq ($(ARMV),6)
- USERCFLAGS += -march=armv6
+ ARCHCFLAGS += -march=armv6
endif
ifeq ($(ARMV),7)
- USERCFLAGS += -march=armv7-a
+ ARCHCFLAGS += -march=armv7-a+fp
endif
+ ifeq ($(ARMV),8)
+ # Running 'setarch linux32 uname -m' returns armv8l on aarch64.
+ # This tells CRIU to handle armv8l just as armv7hf. Right now this is
+ # only used for compile testing. No further verification of armv8l exists.
+ ARCHCFLAGS += -march=armv7-a
+ ARMV := 7
+ endif
+
+ DEFINES := -DCONFIG_ARMV$(ARMV) -DCONFIG_VDSO_32
+
PROTOUFIX := y
# For simplicity - compile code in Arm mode without interwork.
# We could choose Thumb mode as default instead - but a dirty
@@ -57,6 +64,8 @@ endif
ifeq ($(ARCH),aarch64)
DEFINES := -DCONFIG_AARCH64
+ CC_MBRANCH_PROT := $(shell $(CC) -c -x c /dev/null -mbranch-protection=none -o /dev/null >/dev/null 2>&1 && echo "-mbranch-protection=none")
+ CFLAGS_PIE := $(CC_MBRANCH_PROT)
endif
ifeq ($(ARCH),ppc64)
@@ -69,6 +78,18 @@ ifeq ($(ARCH),x86)
DEFINES := -DCONFIG_X86_64
endif
+ifeq ($(ARCH),mips)
+ DEFINES := -DCONFIG_MIPS
+endif
+
+ifeq ($(ARCH),loongarch64)
+ DEFINES := -DCONFIG_LOONGARCH64
+endif
+
+ifeq ($(ARCH),riscv64)
+ DEFINES := -DCONFIG_RISCV64
+endif
+
#
# CFLAGS_PIE:
#
@@ -77,7 +98,6 @@ endif
# commit "S/390: Fix 64 bit sibcall".
ifeq ($(ARCH),s390)
ARCH := s390
- SRCARCH := s390
DEFINES := -DCONFIG_S390
CFLAGS_PIE := -fno-optimize-sibling-calls
endif
@@ -85,25 +105,47 @@ endif
CFLAGS_PIE += -DCR_NOGLIBC
export CFLAGS_PIE
-LDARCH ?= $(SRCARCH)
+LDARCH ?= $(ARCH)
export LDARCH
export PROTOUFIX DEFINES
#
# Independent options for all tools.
DEFINES += -D_FILE_OFFSET_BITS=64
+DEFINES += -D_LARGEFILE64_SOURCE
DEFINES += -D_GNU_SOURCE
-WARNINGS := -Wall -Wformat-security
+WARNINGS := -Wall -Wformat-security -Wdeclaration-after-statement -Wstrict-prototypes
+
+# -Wdangling-pointer results in false warning when we add a list element to
+# local list head variable. It is false positive because before leaving the
+# function we always check that local list head variable is empty, thus
+# insuring that pointer to it is not dangling anywhere, but gcc can't
+# understand it.
+# Note: There is similar problem with kernel list, where this warning is also
+# disabled: https://github.com/torvalds/linux/commit/49beadbd47c2
+WARNINGS += -Wno-dangling-pointer -Wno-unknown-warning-option
CFLAGS-GCOV := --coverage -fno-exceptions -fno-inline -fprofile-update=atomic
export CFLAGS-GCOV
+ifeq ($(ARCH),mips)
+WARNINGS := -rdynamic
+endif
+
+ifeq ($(ARCH),loongarch64)
+WARNINGS += -Wno-implicit-function-declaration
+endif
+
ifneq ($(GCOV),)
LDFLAGS += -lgcov
CFLAGS += $(CFLAGS-GCOV)
endif
+ifneq ($(NETWORK_LOCK_DEFAULT),)
+ CFLAGS += -DNETWORK_LOCK_DEFAULT=$(NETWORK_LOCK_DEFAULT)
+endif
+
ifeq ($(ASAN),1)
CFLAGS-ASAN := -fsanitize=address
export CFLAGS-ASAN
@@ -128,12 +170,12 @@ export GMON GMONLDOPT
endif
AFLAGS += -D__ASSEMBLY__
-CFLAGS += $(USERCFLAGS) $(WARNINGS) $(DEFINES) -iquote include/
+CFLAGS += $(USERCFLAGS) $(ARCHCFLAGS) $(WARNINGS) $(DEFINES) -iquote include/
HOSTCFLAGS += $(WARNINGS) $(DEFINES) -iquote include/
export AFLAGS CFLAGS USERCLFAGS HOSTCFLAGS
# Default target
-all: criu lib crit
+all: criu lib crit cuda_plugin
.PHONY: all
#
@@ -188,12 +230,13 @@ criu-deps += include/common/asm
#
# Configure variables.
export CONFIG_HEADER := include/common/config.h
-ifeq ($(filter tags etags cscope clean mrproper,$(MAKECMDGOALS)),)
+ifeq ($(filter tags etags cscope clean lint indent fetch-clang-format help mrproper,$(MAKECMDGOALS)),)
include Makefile.config
else
# To clean all files, enable make/build options here
export CONFIG_COMPAT := y
export CONFIG_GNUTLS := y
+export CONFIG_HAS_LIBBPF := y
endif
#
@@ -235,22 +278,19 @@ criu: $(criu-deps)
$(Q) $(MAKE) $(build)=criu all
.PHONY: criu
-crit/Makefile: ;
-crit/%: criu .FORCE
- $(Q) $(MAKE) $(build)=crit $@
-crit: criu
- $(Q) $(MAKE) $(build)=crit all
-.PHONY: crit
+unittest: $(criu-deps)
+ $(Q) $(MAKE) $(build)=criu unittest
+.PHONY: unittest
#
-# Libraries next once crit it ready
+# Libraries next once criu is ready
# (we might generate headers and such
# when building criu itself).
lib/Makefile: ;
-lib/%: crit .FORCE
+lib/%: criu .FORCE
$(Q) $(MAKE) $(build)=lib $@
-lib: crit
+lib: criu
$(Q) $(MAKE) $(build)=lib all
.PHONY: lib
@@ -259,21 +299,28 @@ clean mrproper:
$(Q) $(MAKE) $(build)=criu $@
$(Q) $(MAKE) $(build)=soccr $@
$(Q) $(MAKE) $(build)=lib $@
+ $(Q) $(MAKE) $(build)=crit $@
$(Q) $(MAKE) $(build)=compel $@
$(Q) $(MAKE) $(build)=compel/plugins $@
- $(Q) $(MAKE) $(build)=lib $@
- $(Q) $(MAKE) $(build)=crit $@
.PHONY: clean mrproper
+clean-amdgpu_plugin:
+ $(Q) $(MAKE) -C plugins/amdgpu clean
+.PHONY: clean-amdgpu_plugin
+
+clean-cuda_plugin:
+ $(Q) $(MAKE) -C plugins/cuda clean
+.PHONY: clean-cuda_plugin
+
clean-top:
$(Q) $(MAKE) -C Documentation clean
$(Q) $(MAKE) $(build)=test/compel clean
$(Q) $(RM) .gitid
.PHONY: clean-top
-clean: clean-top
+clean: clean-top clean-amdgpu_plugin clean-cuda_plugin
-mrproper-top: clean-top
+mrproper-top: clean-top clean-amdgpu_plugin clean-cuda_plugin
$(Q) $(RM) $(CONFIG_HEADER)
$(Q) $(RM) $(VERSION_HEADER)
$(Q) $(RM) $(COMPEL_VERSION_HEADER)
@@ -301,6 +348,18 @@ test: zdtm
$(Q) $(MAKE) -C test
.PHONY: test
+amdgpu_plugin: criu
+ $(Q) $(MAKE) -C plugins/amdgpu all
+.PHONY: amdgpu_plugin
+
+cuda_plugin: criu
+ $(Q) $(MAKE) -C plugins/cuda all
+.PHONY: cuda_plugin
+
+crit: lib
+ $(Q) $(MAKE) -C crit
+.PHONY: crit
+
#
# Generating tar requires tag matched CRIU_VERSION.
# If not found then simply use GIT's describe with
@@ -354,17 +413,19 @@ gcov:
.PHONY: gcov
docker-build:
- $(MAKE) -C scripts/build/ x86_64
+ $(MAKE) -C scripts/build/ x86_64
.PHONY: docker-build
docker-test:
- docker run --rm -it --privileged criu-x86_64 ./test/zdtm.py run -a -x tcp6 -x tcpbuf6 -x static/rtc -x cgroup
+ docker run --rm --privileged -v /lib/modules:/lib/modules --network=host --cgroupns=host criu-x86_64 \
+ ./test/zdtm.py run -a --keep-going --ignore-taint
.PHONY: docker-test
help:
@echo ' Targets:'
@echo ' all - Build all [*] targets'
@echo ' * criu - Build criu'
+ @echo ' * crit - Build crit'
@echo ' zdtm - Build zdtm test-suite'
@echo ' docs - Build documentation'
@echo ' install - Install CRIU (see INSTALL.md)'
@@ -377,14 +438,76 @@ help:
@echo ' cscope - Generate cscope database'
@echo ' test - Run zdtm test-suite'
@echo ' gcov - Make code coverage report'
+ @echo ' unittest - Run unit tests'
+ @echo ' lint - Run code linters'
+ @echo ' indent - Indent C code'
+ @echo ' amdgpu_plugin - Make AMD GPU plugin'
+ @echo ' cuda_plugin - Make NVIDIA CUDA plugin'
.PHONY: help
-lint:
- flake8 --version
- flake8 --config=scripts/flake8.cfg test/zdtm.py
- flake8 --config=scripts/flake8.cfg test/inhfd/*.py
- flake8 --config=scripts/flake8.cfg test/others/rpc/config_file.py
- flake8 --config=scripts/flake8.cfg lib/py/images/pb2dict.py
+ruff:
+ @ruff --version
+ ruff check ${RUFF_FLAGS} --config=scripts/ruff.toml \
+ test/zdtm.py \
+ test/inhfd/*.py \
+ test/others/rpc/config_file.py \
+ test/others/action-script/check_actions.py \
+ test/others/pycriu/*.py \
+ lib/pycriu/criu.py \
+ lib/pycriu/__init__.py \
+ lib/pycriu/images/pb2dict.py \
+ lib/pycriu/images/images.py \
+ scripts/criu-ns \
+ test/others/criu-ns/run.py \
+ crit/*.py \
+ crit/crit/*.py \
+ scripts/uninstall_module.py \
+ coredump/ coredump/coredump \
+ scripts/github-indent-warnings.py
+
+shellcheck:
+ shellcheck --version
+ shellcheck scripts/*.sh
+ shellcheck scripts/ci/*.sh
+ shellcheck contrib/apt-install contrib/dependencies/*.sh
+ shellcheck -x test/others/crit/*.sh
+ shellcheck -x test/others/libcriu/*.sh
+ shellcheck -x test/others/crit/*.sh test/others/criu-coredump/*.sh
+ shellcheck -x test/others/config-file/*.sh
+ shellcheck -x test/others/action-script/*.sh
+
+codespell:
+ codespell
+
+lint: ruff shellcheck codespell
+ # Do not append \n to pr_perror, pr_pwarn or fail
+ ! git --no-pager grep -E '^\s*\<(pr_perror|pr_pwarn|fail)\>.*\\n"'
+ # Do not use %m with pr_* or fail
+ ! git --no-pager grep -E '^\s*\<(pr_(err|perror|warn|pwarn|debug|info|msg)|fail)\>.*%m'
+ # Do not use errno with pr_perror, pr_pwarn or fail
+ ! git --no-pager grep -E '^\s*\<(pr_perror|pr_pwarn|fail)\>\(".*".*errno'
+ # End pr_(err|warn|msg|info|debug) with \n
+ ! git --no-pager grep -En '^\s*\.*);$$' | grep -v '\\n'
+ # No EOL whitespace for C files
+ ! git --no-pager grep -E '\s+$$' \*.c \*.h
+.PHONY: lint ruff shellcheck codespell
+
+codecov: SHELL := $(shell command -v bash)
+codecov:
+ curl -Os https://uploader.codecov.io/latest/linux/codecov
+ chmod +x codecov
+ ./codecov
+.PHONY: codecov
+
+fetch-clang-format: .FORCE
+ $(E) ".clang-format"
+ $(Q) scripts/fetch-clang-format.sh
+
+BASE ?= "HEAD~1"
+OPTS ?= "--quiet"
+indent:
+ git clang-format --style file --extensions c,h $(OPTS) $(BASE)
+.PHONY: indent
include Makefile.install
diff --git a/Makefile.compel b/Makefile.compel
index 764afadc8..a4209edc5 100644
--- a/Makefile.compel
+++ b/Makefile.compel
@@ -50,8 +50,8 @@ compel/plugins/%: $(compel-deps) .FORCE
#
# GNU make 4.x supports targets matching via wide
-# match targeting, where GNU make 3.x series (used on
-# Travis) is not, so we have to write them here explicitly.
+# match targeting, where GNU make 3.x series is not,
+# so we have to write them here explicitly.
compel/plugins/std.lib.a: $(compel-deps) .FORCE
$(Q) $(MAKE) $(build)=compel/plugins $@
diff --git a/Makefile.config b/Makefile.config
index 1e4352b9d..5cf4b8216 100644
--- a/Makefile.config
+++ b/Makefile.config
@@ -2,12 +2,15 @@ include $(__nmk_dir)utils.mk
include $(__nmk_dir)msg.mk
include scripts/feature-tests.mak
+# This is a kludge for $(info ...) to not eat spaces.
+S :=
+
ifeq ($(call try-cc,$(FEATURE_TEST_LIBBSD_DEV),-lbsd),true)
LIBS_FEATURES += -lbsd
FEATURE_DEFINES += -DCONFIG_HAS_LIBBSD
else
- $(info Note: Building without setproctitle() and strlcpy() support.)
- $(info $(info) To enable these features, please install libbsd-devel (RPM) / libbsd-dev (DEB).)
+ $(info Note: Building without setproctitle() support.)
+ $(info $S Install libbsd-devel (RPM) / libbsd-dev (DEB) to fix.)
endif
ifeq ($(call pkg-config-check,libselinux),y)
@@ -15,45 +18,82 @@ ifeq ($(call pkg-config-check,libselinux),y)
FEATURE_DEFINES += -DCONFIG_HAS_SELINUX
endif
+ifeq ($(call pkg-config-check,libbpf),y)
+ LIBS_FEATURES += -lbpf
+ FEATURE_DEFINES += -DCONFIG_HAS_LIBBPF
+ export CONFIG_HAS_LIBBPF := y
+endif
+
+ifeq ($(call pkg-config-check,libdrm),y)
+ export CONFIG_AMDGPU := y
+ $(info Note: Building with amdgpu_plugin.)
+else
+ $(info Note: Building without amdgpu_plugin.)
+ $(info $S Install libdrm-devel (RPM) or libdrm-dev (DEB) to fix.)
+endif
+
ifeq ($(NO_GNUTLS)x$(call pkg-config-check,gnutls),xy)
LIBS_FEATURES += -lgnutls
export CONFIG_GNUTLS := y
FEATURE_DEFINES += -DCONFIG_GNUTLS
else
- $(info Note: Building without GnuTLS support)
+ $(info Note: Building without GnuTLS support.)
+ $(info $S Install gnutls-devel (RPM) or gnutls-dev (DEB) to fix.)
+endif
+
+ifeq ($(call pkg-config-check,libnftables),y)
+ LIB_NFTABLES := $(shell $(PKG_CONFIG) --libs libnftables)
+ ifeq ($(call try-cc,$(FEATURE_TEST_NFTABLES_LIB_API_0),$(LIB_NFTABLES)),true)
+ LIBS_FEATURES += $(LIB_NFTABLES)
+ FEATURE_DEFINES += -DCONFIG_HAS_NFTABLES_LIB_API_0
+ else ifeq ($(call try-cc,$(FEATURE_TEST_NFTABLES_LIB_API_1),$(LIB_NFTABLES)),true)
+ LIBS_FEATURES += $(LIB_NFTABLES)
+ FEATURE_DEFINES += -DCONFIG_HAS_NFTABLES_LIB_API_1
+ else
+ $(info Warn: Building without nftables support (incompatible API version).)
+ endif
+else
+ $(info Warn: Building without nftables support.)
+ $(info $S Install nftables-devel (RPM) or libnftables-dev (DEB) to fix.)
endif
export LIBS += $(LIBS_FEATURES)
+ifneq ($(PLUGINDIR),)
+ FEATURE_DEFINES += -DCR_PLUGIN_DEFAULT="\"$(PLUGINDIR)\""
+endif
+
CONFIG_FILE = .config
$(CONFIG_FILE):
touch $(CONFIG_FILE)
-ifeq ($(SRCARCH),x86)
+ifeq ($(ARCH),x86)
# CONFIG_COMPAT is only for x86 now, no need for compile-test other archs
ifeq ($(call try-asm,$(FEATURE_TEST_X86_COMPAT)),true)
export CONFIG_COMPAT := y
FEATURE_DEFINES += -DCONFIG_COMPAT
else
- $(info Note: Building without ia32 C/R, missed ia32 support in gcc)
- $(info $(info) That may be related to missing gcc-multilib in your)
- $(info $(info) distribution or you may have Debian with buggy toolchain)
- $(info $(info) (issue https://github.com/checkpoint-restore/criu/issues/315))
+ $(info Note: Building without ia32 C/R, missing ia32 support in gcc.)
+ $(info $S It may be related to missing gcc-multilib in your)
+ $(info $S distribution, or you may have Debian with buggy toolchain.)
+ $(info $S See https://github.com/checkpoint-restore/criu/issues/315.)
endif
endif
export DEFINES += $(FEATURE_DEFINES)
export CFLAGS += $(FEATURE_DEFINES)
-FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \
- SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW
+FEATURES_LIST := TCP_REPAIR PTRACE_PEEKSIGINFO \
+ SETPROCTITLE_INIT TCP_REPAIR_WINDOW MEMFD_CREATE \
+ OPENAT2 NO_LIBC_RSEQ_DEFS
# $1 - config name
define gen-feature-test
ifeq ($$(call try-cc,$$(FEATURE_TEST_$(1)),$$(LIBS_FEATURES),$$(DEFINES)),true)
$(Q) echo '#define CONFIG_HAS_$(1)' >> $$@
- $(Q) echo '' >> $$@
+else
+ $(Q) echo '// CONFIG_HAS_$(1) is not set' >> $$@
endif
endef
diff --git a/Makefile.install b/Makefile.install
index 3987bcc6f..70c607ec6 100644
--- a/Makefile.install
+++ b/Makefile.install
@@ -7,6 +7,7 @@ MANDIR ?= $(PREFIX)/share/man
INCLUDEDIR ?= $(PREFIX)/include
LIBEXECDIR ?= $(PREFIX)/libexec
RUNDIR ?= /run
+PLUGINDIR ?= $(PREFIX)/lib/criu
#
# For recent Debian/Ubuntu with multiarch support.
@@ -26,7 +27,34 @@ endif
LIBDIR ?= $(PREFIX)/lib
export PREFIX BINDIR SBINDIR MANDIR RUNDIR
-export LIBDIR INCLUDEDIR LIBEXECDIR
+export LIBDIR INCLUDEDIR LIBEXECDIR PLUGINDIR
+
+# Detect externally managed Python environment (PEP 668).
+PYTHON_EXTERNALLY_MANAGED := $(shell $(PYTHON) -c 'import os, sysconfig; print(int(os.path.isfile(os.path.join(sysconfig.get_path("stdlib"), "EXTERNALLY-MANAGED"))))')
+PIP_BREAK_SYSTEM_PACKAGES ?= 0
+
+# If Python environment is externally managed and PIP_BREAK_SYSTEM_PACKAGES is not set, skip pip install.
+SKIP_PIP_INSTALL := 0
+ifeq ($(PYTHON_EXTERNALLY_MANAGED),1)
+ifeq ($(PIP_BREAK_SYSTEM_PACKAGES),0)
+
+SKIP_PIP_INSTALL := 1
+$(info Warn: Externally managed python environment)
+$(info Consider using PIP_BREAK_SYSTEM_PACKAGES=1)
+
+endif
+endif
+
+# Default flags for pip install:
+# --ignore-installed: Overwrite already installed pycriu/crit packages
+# --no-build-isolation: Use current Python environment to build pycriu/crit packages
+# --no-deps: Don't install any dependencies
+# --no-index: Don't use PyPI index to find packages
+# --progress-bar: Cleaner output
+# --upgrade: Treat the install as an upgrade when replacing the installed version
+PIPFLAGS ?= --ignore-installed --no-build-isolation --no-deps --no-index --progress-bar off --upgrade
+
+export SKIP_PIP_INSTALL PIPFLAGS
install-man:
$(Q) $(MAKE) -C Documentation install
@@ -36,22 +64,37 @@ install-lib: lib
$(Q) $(MAKE) $(build)=lib install
.PHONY: install-lib
+install-crit: lib
+ $(Q) $(MAKE) $(build)=crit install
+.PHONY: install-crit
+
install-criu: criu
$(Q) $(MAKE) $(build)=criu install
.PHONY: install-criu
+install-amdgpu_plugin: amdgpu_plugin
+ $(Q) $(MAKE) -C plugins/amdgpu install
+.PHONY: install-amdgpu_plugin
+
+install-cuda_plugin: cuda_plugin
+ $(Q) $(MAKE) -C plugins/cuda install
+.PHONY: install-cuda_plugin
+
install-compel: $(compel-install-targets)
$(Q) $(MAKE) $(build)=compel install
$(Q) $(MAKE) $(build)=compel/plugins install
.PHONY: install-compel
-install: install-man install-lib install-criu install-compel ;
+install: install-man install-lib install-crit install-criu install-compel install-amdgpu_plugin install-cuda_plugin ;
.PHONY: install
uninstall:
$(Q) $(MAKE) -C Documentation $@
$(Q) $(MAKE) $(build)=lib $@
+ $(Q) $(MAKE) $(build)=crit $@
$(Q) $(MAKE) $(build)=criu $@
$(Q) $(MAKE) $(build)=compel $@
$(Q) $(MAKE) $(build)=compel/plugins $@
+ $(Q) $(MAKE) -C plugins/amdgpu $@
+ $(Q) $(MAKE) -C plugins/cuda $@
.PHONY: uninstall
diff --git a/Makefile.versions b/Makefile.versions
index f3adcb0a6..3e6c9ed22 100644
--- a/Makefile.versions
+++ b/Makefile.versions
@@ -1,10 +1,10 @@
#
# CRIU version.
-CRIU_VERSION_MAJOR := 3
-CRIU_VERSION_MINOR := 13
+CRIU_VERSION_MAJOR := 4
+CRIU_VERSION_MINOR := 2
CRIU_VERSION_SUBLEVEL :=
CRIU_VERSION_EXTRA :=
-CRIU_VERSION_NAME := Silicon Willet
+CRIU_VERSION_NAME := CRIUTIBILITY
CRIU_VERSION := $(CRIU_VERSION_MAJOR)$(if $(CRIU_VERSION_MINOR),.$(CRIU_VERSION_MINOR))$(if $(CRIU_VERSION_SUBLEVEL),.$(CRIU_VERSION_SUBLEVEL))$(if $(CRIU_VERSION_EXTRA),.$(CRIU_VERSION_EXTRA))
export CRIU_VERSION_MAJOR CRIU_VERSION_MINOR CRIU_VERSION_SUBLEVEL
diff --git a/README.md b/README.md
index 16e8452b5..6e2a0de9e 100644
--- a/README.md
+++ b/README.md
@@ -1,22 +1,33 @@
-[](https://travis-ci.org/checkpoint-restore/criu)
-[](https://travis-ci.org/checkpoint-restore/criu)
-[](https://www.codacy.com/app/xemul/criu?utm_source=github.com&utm_medium=referral&utm_content=xemul/criu&utm_campaign=Badge_Grade)
-
+[](
+ https://github.com/checkpoint-restore/criu/actions/workflows/x86-64-gcc-test.yml)
+[](
+ https://github.com/checkpoint-restore/criu/actions/workflows/docker-test.yml)
+[](
+ https://github.com/checkpoint-restore/criu/actions/workflows/podman-test.yml)
+[](
+ https://circleci.com/gh/checkpoint-restore/criu)
+
+
## CRIU -- A project to implement checkpoint/restore functionality for Linux
CRIU (stands for Checkpoint and Restore in Userspace) is a utility to checkpoint/restore Linux tasks.
-Using this tool, you can freeze a running application (or part of it) and checkpoint
+Using this tool, you can freeze a running application (or part of it) and checkpoint
it to a hard drive as a collection of files. You can then use the files to restore and run the
application from the point it was frozen at. The distinctive feature of the CRIU
project is that it is mainly implemented in user space. There are some more projects
-doing C/R for Linux, and so far CRIU [appears to be](https://criu.org/Comparison_to_other_CR_projects)
+doing C/R for Linux, and so far CRIU [appears to be](https://criu.org/Comparison_to_other_CR_projects)
the most feature-rich and up-to-date with the kernel.
+CRIU project is (almost) the never-ending story, because we have to always keep up with the
+Linux kernel supporting checkpoint and restore for all the features it provides. Thus we're
+looking for contributors of all kinds -- feedback, bug reports, testing, coding, writing, etc.
+Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) if you would like to get involved.
+
The project [started](https://criu.org/History) as the way to do live migration for OpenVZ
-Linux containers, but later grew to more sophisticated and flexible tool. It is currently
-used by (integrated into) OpenVZ, LXC/LXD, Docker, and other software, project gets tremendous
+Linux containers, but later grew to more sophisticated and flexible tool. It is currently
+used by (integrated into) OpenVZ, LXC/LXD, Docker, and other software, project gets tremendous
help from the community, and its packages are included into many Linux distributions.
The project home is at http://criu.org. This wiki contains all the knowledge base for CRIU we have.
@@ -24,15 +35,15 @@ Pages worth starting with are:
- [Installation instructions](http://criu.org/Installation)
- [A simple example of usage](http://criu.org/Simple_loop)
- [Examples of more advanced usage](https://criu.org/Category:HOWTO)
-- Troubleshooting can be hard, some help can be found [here](https://criu.org/When_C/R_fails), [here](https://criu.org/What_cannot_be_checkpointed) and [here](https://criu.org/FAQ)
+- Troubleshooting can be hard, some help can be found [here](https://criu.org/When_C/R_fails), [here](https://criu.org/What_cannot_be_checkpointed) and [here](https://criu.org/index.php?title=FAQ)
-### Checkpoint and restore of simple loop process
-[
](https://asciinema.org/a/232445)
+### Checkpoint and restore of simple loop process
+
## Advanced features
As main usage for CRIU is live migration, there's a library for it called P.Haul. Also the
-project exposes two cool core features as standalone libraries. These are libcompel for parasite code
+project exposes two cool core features as standalone libraries. These are libcompel for parasite code
injection and libsoccr for TCP connections checkpoint-restore.
### Live migration
@@ -56,21 +67,9 @@ One of the CRIU features is the ability to save and restore state of a TCP socke
without breaking the connection. This functionality is considered to be useful by
itself, and we have it available as the [libsoccr library](https://criu.org/Libsoccr).
-## How to contribute
-
-CRIU project is (almost) the never-ending story, because we have to always keep up with the
-Linux kernel supporting checkpoint and restore for all the features it provides. Thus we're
-looking for contributors of all kinds -- feedback, bug reports, testing, coding, writing, etc.
-Here are some useful hints to get involved.
-
-* We have both -- [very simple](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks;
-* CRIU does need [extensive testing](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting);
-* Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles;
-* Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu);
-* For historical reasons we do not accept PRs, instead [patches are welcome](http://criu.org/How_to_submit_patches);
-* Spread the word about CRIU in [social networks](http://criu.org/Contacts);
-* If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events);
-
## Licence
The project is licensed under GPLv2 (though files sitting in the lib/ directory are LGPLv2.1).
+
+All files in the images/ directory are licensed under the Expat license (so-called MIT).
+See the images/LICENSE file.
diff --git a/compel/.gitignore b/compel/.gitignore
index eab3337d6..5e770a86c 100644
--- a/compel/.gitignore
+++ b/compel/.gitignore
@@ -4,6 +4,9 @@ arch/arm/plugins/std/syscalls/syscalls.S
arch/aarch64/plugins/std/syscalls/syscalls.S
arch/s390/plugins/std/syscalls/syscalls.S
arch/ppc64/plugins/std/syscalls/syscalls.S
+arch/mips/plugins/std/syscalls/syscalls-64.S
+arch/loongarch64/plugins/std/syscalls/syscalls-64.S
+arch/riscv64/plugins/std/syscalls/syscalls.S
include/version.h
plugins/include/uapi/std/asm/syscall-types.h
plugins/include/uapi/std/syscall-64.h
diff --git a/compel/Makefile b/compel/Makefile
index de9318c42..c0b8a82a0 100644
--- a/compel/Makefile
+++ b/compel/Makefile
@@ -28,8 +28,12 @@ lib-y += src/lib/infect-util.o
lib-y += src/lib/infect.o
lib-y += src/lib/ptrace.o
-# handle_elf() has no support of ELF relocations on ARM (yet?)
-ifneq ($(filter arm aarch64,$(ARCH)),)
+ifeq ($(ARCH),x86)
+lib-y += arch/$(ARCH)/src/lib/thread_area.o
+endif
+
+# handle_elf() has no support of ELF relocations on ARM and RISCV64 (yet?)
+ifneq ($(filter arm aarch64 loongarch64 riscv64,$(ARCH)),)
CFLAGS += -DNO_RELOCS
HOSTCFLAGS += -DNO_RELOCS
endif
diff --git a/compel/arch/aarch64/plugins/include/asm/syscall-types.h b/compel/arch/aarch64/plugins/include/asm/syscall-types.h
index ee0e2185d..45fd57af6 100644
--- a/compel/arch/aarch64/plugins/include/asm/syscall-types.h
+++ b/compel/arch/aarch64/plugins/include/asm/syscall-types.h
@@ -1,7 +1,7 @@
#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__
#define COMPEL_ARCH_SYSCALL_TYPES_H__
-#define SA_RESTORER 0x04000000
+#define SA_RESTORER 0x04000000
typedef void rt_signalfn_t(int, siginfo_t *, void *);
typedef rt_signalfn_t *rt_sighandler_t;
@@ -9,20 +9,20 @@ typedef rt_signalfn_t *rt_sighandler_t;
typedef void rt_restorefn_t(void);
typedef rt_restorefn_t *rt_sigrestore_t;
-#define _KNSIG 64
-#define _NSIG_BPW 64
+#define _KNSIG 64
+#define _NSIG_BPW 64
-#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
+#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
typedef struct {
unsigned long sig[_KNSIG_WORDS];
} k_rtsigset_t;
typedef struct {
- rt_sighandler_t rt_sa_handler;
- unsigned long rt_sa_flags;
- rt_sigrestore_t rt_sa_restorer;
- k_rtsigset_t rt_sa_mask;
+ rt_sighandler_t rt_sa_handler;
+ unsigned long rt_sa_flags;
+ rt_sigrestore_t rt_sa_restorer;
+ k_rtsigset_t rt_sa_mask;
} rt_sigaction_t;
#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */
diff --git a/compel/arch/aarch64/plugins/std/parasite-head.S b/compel/arch/aarch64/plugins/std/parasite-head.S
index 5e7067f6b..456c2117d 100644
--- a/compel/arch/aarch64/plugins/std/parasite-head.S
+++ b/compel/arch/aarch64/plugins/std/parasite-head.S
@@ -2,19 +2,6 @@
.section .head.text, "ax"
ENTRY(__export_parasite_head_start)
- adr x2, __export_parasite_head_start // get the address of this instruction
-
- ldr x0, __export_parasite_cmd
-
- ldr x1, parasite_args_ptr
- add x1, x1, x2 // fixup __export_parasite_args
-
bl parasite_service
brk #0 // the instruction BRK #0 generates the signal SIGTRAP in Linux
-
-parasite_args_ptr:
- .quad __export_parasite_args
-
-__export_parasite_cmd:
- .quad 0
END(__export_parasite_head_start)
diff --git a/compel/arch/aarch64/plugins/std/syscalls/syscall-aux.h b/compel/arch/aarch64/plugins/std/syscalls/syscall-aux.h
index 6272bf3a8..3c7124856 100644
--- a/compel/arch/aarch64/plugins/std/syscalls/syscall-aux.h
+++ b/compel/arch/aarch64/plugins/std/syscalls/syscall-aux.h
@@ -1,3 +1,3 @@
#ifndef __NR_openat
-# define __NR_openat 56
+#define __NR_openat 56
#endif
diff --git a/compel/arch/aarch64/scripts/compel-pack.lds.S b/compel/arch/aarch64/scripts/compel-pack.lds.S
index eba89cd5f..57895ec9b 100644
--- a/compel/arch/aarch64/scripts/compel-pack.lds.S
+++ b/compel/arch/aarch64/scripts/compel-pack.lds.S
@@ -29,8 +29,4 @@ SECTIONS
*(.eh_frame*)
*(*)
}
-
-/* Parasite args should have 4 bytes align, as we have futex inside. */
-. = ALIGN(4);
-__export_parasite_args = .;
}
diff --git a/compel/arch/aarch64/src/lib/cpu.c b/compel/arch/aarch64/src/lib/cpu.c
index cfaab1e65..538a29887 100644
--- a/compel/arch/aarch64/src/lib/cpu.c
+++ b/compel/arch/aarch64/src/lib/cpu.c
@@ -7,7 +7,7 @@
#include "log.h"
-#undef LOG_PREFIX
+#undef LOG_PREFIX
#define LOG_PREFIX "cpu: "
static compel_cpuinfo_t rt_info;
@@ -22,11 +22,24 @@ static void fetch_rt_cpuinfo(void)
}
}
-void compel_set_cpu_cap(compel_cpuinfo_t *info, unsigned int feature) { }
-void compel_clear_cpu_cap(compel_cpuinfo_t *info, unsigned int feature) { }
-int compel_test_cpu_cap(compel_cpuinfo_t *info, unsigned int feature) { return 0; }
-int compel_test_fpu_cap(compel_cpuinfo_t *info, unsigned int feature) { return 0; }
-int compel_cpuid(compel_cpuinfo_t *info) { return 0; }
+void compel_set_cpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+}
+void compel_clear_cpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+}
+int compel_test_cpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+ return 0;
+}
+int compel_test_fpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+ return 0;
+}
+int compel_cpuid(compel_cpuinfo_t *info)
+{
+ return 0;
+}
bool compel_cpu_has_feature(unsigned int feature)
{
diff --git a/compel/arch/aarch64/src/lib/handle-elf.c b/compel/arch/aarch64/src/lib/handle-elf.c
index 1c3686c48..206aef4cd 100644
--- a/compel/arch/aarch64/src/lib/handle-elf.c
+++ b/compel/arch/aarch64/src/lib/handle-elf.c
@@ -1,20 +1,17 @@
#include
-
-#include "uapi/compel.h"
+#include
#include "handle-elf.h"
#include "piegen.h"
#include "log.h"
-static const unsigned char __maybe_unused
-elf_ident_64_le[EI_NIDENT] = {
- 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,
+static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = {
+ 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
-static const unsigned char __maybe_unused
-elf_ident_64_be[EI_NIDENT] = {
- 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x02, 0x01, 0x00,
+static const unsigned char __maybe_unused elf_ident_64_be[EI_NIDENT] = {
+ 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x02, 0x01, 0x00, /* clang-format */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
diff --git a/compel/arch/aarch64/src/lib/include/handle-elf.h b/compel/arch/aarch64/src/lib/include/handle-elf.h
index 0f64b34cb..9f1a75081 100644
--- a/compel/arch/aarch64/src/lib/include/handle-elf.h
+++ b/compel/arch/aarch64/src/lib/include/handle-elf.h
@@ -3,8 +3,8 @@
#include "elf64-types.h"
-#define __handle_elf handle_elf_aarch64
-#define arch_is_machine_supported(e_machine) (e_machine == EM_AARCH64)
+#define __handle_elf handle_elf_aarch64
+#define arch_is_machine_supported(e_machine) (e_machine == EM_AARCH64)
extern int handle_elf_aarch64(void *mem, size_t size);
diff --git a/compel/arch/aarch64/src/lib/include/syscall.h b/compel/arch/aarch64/src/lib/include/syscall.h
index e2ec1272e..13ee906e1 100644
--- a/compel/arch/aarch64/src/lib/include/syscall.h
+++ b/compel/arch/aarch64/src/lib/include/syscall.h
@@ -1,4 +1,8 @@
#ifndef __COMPEL_SYSCALL_H__
#define __COMPEL_SYSCALL_H__
-#define __NR(syscall, compat) __NR_##syscall
+#define __NR(syscall, compat) \
+ ({ \
+ (void)compat; \
+ __NR_##syscall; \
+ })
#endif
diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/breakpoints.h b/compel/arch/aarch64/src/lib/include/uapi/asm/breakpoints.h
index 5f090490d..8a61b268f 100644
--- a/compel/arch/aarch64/src/lib/include/uapi/asm/breakpoints.h
+++ b/compel/arch/aarch64/src/lib/include/uapi/asm/breakpoints.h
@@ -2,14 +2,41 @@
#define __COMPEL_BREAKPOINTS_H__
#define ARCH_SI_TRAP TRAP_BRKPT
-static inline int ptrace_set_breakpoint(pid_t pid, void *addr)
-{
- return 0;
-}
+#include
+#include
-static inline int ptrace_flush_breakpoints(pid_t pid)
-{
- return 0;
-}
+struct hwbp_cap {
+ char arch;
+ char bp_count;
+};
+
+/* copied from `linux/arch/arm64/include/asm/hw_breakpoint.h` */
+/* Lengths */
+#define ARM_BREAKPOINT_LEN_1 0x1
+#define ARM_BREAKPOINT_LEN_2 0x3
+#define ARM_BREAKPOINT_LEN_3 0x7
+#define ARM_BREAKPOINT_LEN_4 0xf
+#define ARM_BREAKPOINT_LEN_5 0x1f
+#define ARM_BREAKPOINT_LEN_6 0x3f
+#define ARM_BREAKPOINT_LEN_7 0x7f
+#define ARM_BREAKPOINT_LEN_8 0xff
+
+/* Privilege Levels */
+#define AARCH64_BREAKPOINT_EL1 1
+#define AARCH64_BREAKPOINT_EL0 2
+
+/* Breakpoint */
+#define ARM_BREAKPOINT_EXECUTE 0
+
+/* Watchpoints */
+#define ARM_BREAKPOINT_LOAD 1
+#define ARM_BREAKPOINT_STORE 2
+#define AARCH64_ESR_ACCESS_MASK (1 << 6)
+
+#define DISABLE_HBP 0
+#define ENABLE_HBP 1
+
+int ptrace_set_breakpoint(pid_t pid, void *addr);
+int ptrace_flush_breakpoints(pid_t pid);
#endif
diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/cpu.h b/compel/arch/aarch64/src/lib/include/uapi/asm/cpu.h
index c35460e15..12e749508 100644
--- a/compel/arch/aarch64/src/lib/include/uapi/asm/cpu.h
+++ b/compel/arch/aarch64/src/lib/include/uapi/asm/cpu.h
@@ -1,6 +1,7 @@
#ifndef UAPI_COMPEL_ASM_CPU_H__
#define UAPI_COMPEL_ASM_CPU_H__
-typedef struct { } compel_cpuinfo_t;
+typedef struct {
+} compel_cpuinfo_t;
#endif /* UAPI_COMPEL_ASM_CPU_H__ */
diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h
new file mode 100644
index 000000000..9f9655e3b
--- /dev/null
+++ b/compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h
@@ -0,0 +1,47 @@
+#ifndef __UAPI_ASM_GCS_TYPES_H__
+#define __UAPI_ASM_GCS_TYPES_H__
+
+#ifndef NT_ARM_GCS
+#define NT_ARM_GCS 0x410 /* ARM GCS state */
+#endif
+
+/* Shadow Stack/Guarded Control Stack interface */
+#define PR_GET_SHADOW_STACK_STATUS 74
+#define PR_SET_SHADOW_STACK_STATUS 75
+#define PR_LOCK_SHADOW_STACK_STATUS 76
+
+/* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack */
+#ifndef PR_SHADOW_STACK_ENABLE
+#define PR_SHADOW_STACK_ENABLE (1UL << 0)
+#endif
+
+/* Allows explicit GCS stores (eg. using GCSSTR) */
+#ifndef PR_SHADOW_STACK_WRITE
+#define PR_SHADOW_STACK_WRITE (1UL << 1)
+#endif
+
+/* Allows explicit GCS pushes (eg. using GCSPUSHM) */
+#ifndef PR_SHADOW_STACK_PUSH
+#define PR_SHADOW_STACK_PUSH (1UL << 2)
+#endif
+
+#ifndef SHADOW_STACK_SET_TOKEN
+#define SHADOW_STACK_SET_TOKEN 0x1 /* Set up a restore token in the shadow stack */
+#endif
+
+#define PR_SHADOW_STACK_ALL_MODES \
+ PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH
+
+/* copied from: arch/arm64/include/asm/sysreg.h */
+#define GCS_CAP_VALID_TOKEN 0x1
+#define GCS_CAP_ADDR_MASK 0xFFFFFFFFFFFFF000ULL
+#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | GCS_CAP_VALID_TOKEN)
+#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK)
+
+#include
+
+#ifndef HWCAP_GCS
+#define HWCAP_GCS (1UL << 32)
+#endif
+
+#endif /* __UAPI_ASM_GCS_TYPES_H__ */
\ No newline at end of file
diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h
index 4662f7689..606c92ffe 100644
--- a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h
+++ b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h
@@ -2,12 +2,13 @@
#define UAPI_COMPEL_ASM_TYPES_H__
#include
+#include
#include
#include
#include
-#define SIGMAX 64
-#define SIGMAX_OLD 31
+#define SIGMAX 64
+#define SIGMAX_OLD 31
/*
* Copied from the Linux kernel header arch/arm64/include/uapi/asm/ptrace.h
@@ -15,18 +16,53 @@
* A thread ARM CPU context
*/
-typedef struct user_pt_regs user_regs_struct_t;
-typedef struct user_fpsimd_state user_fpregs_struct_t;
+typedef struct user_pt_regs user_regs_struct_t;
-#define REG_RES(r) ((uint64_t)(r).regs[0])
-#define REG_IP(r) ((uint64_t)(r).pc)
-#define REG_SP(r) ((uint64_t)((r).sp))
-#define REG_SYSCALL_NR(r) ((uint64_t)(r).regs[8])
+/*
+ * GCS (Guarded Control Stack)
+ *
+ * This mirrors the kernel definition but renamed to cr_user_gcs
+ * to avoid conflict with kernel headers (/usr/include/asm/ptrace.h).
+ */
+struct cr_user_gcs {
+ __u64 features_enabled;
+ __u64 features_locked;
+ __u64 gcspr_el0;
+};
-#define user_regs_native(pregs) true
+struct user_fpregs_struct {
+ struct user_fpsimd_state fpstate;
+ struct cr_user_gcs gcs;
+};
+typedef struct user_fpregs_struct user_fpregs_struct_t;
-#define ARCH_SI_TRAP TRAP_BRKPT
+#define __compel_arch_fetch_thread_area(tid, th) 0
+#define compel_arch_fetch_thread_area(tctl) 0
+#define compel_arch_get_tls_task(ctl, tls)
+#define compel_arch_get_tls_thread(tctl, tls)
-#define __NR(syscall, compat) __NR_##syscall
+#define REG_RES(r) ((uint64_t)(r).regs[0])
+#define REG_IP(r) ((uint64_t)(r).pc)
+#define SET_REG_IP(r, val) ((r).pc = (val))
+#define REG_SP(r) ((uint64_t)((r).sp))
+#define REG_SYSCALL_NR(r) ((uint64_t)(r).regs[8])
+
+#define user_regs_native(pregs) true
+
+#define ARCH_SI_TRAP TRAP_BRKPT
+
+#define __NR(syscall, compat) \
+ ({ \
+ (void)compat; \
+ __NR_##syscall; \
+ })
+
+extern bool __compel_host_supports_gcs(void);
+#define compel_host_supports_gcs __compel_host_supports_gcs
+
+struct parasite_ctl;
+extern int __parasite_setup_shstk(struct parasite_ctl *ctl,
+ user_fpregs_struct_t *ext_regs);
+#define parasite_setup_shstk __parasite_setup_shstk
#endif /* UAPI_COMPEL_ASM_TYPES_H__ */
diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h
index bff714cca..7efee528f 100644
--- a/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h
+++ b/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h
@@ -1,37 +1,48 @@
#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__
#define UAPI_COMPEL_ASM_SIGFRAME_H__
-#include
+#include
#include
#include
+#include
/* Copied from the kernel header arch/arm64/include/uapi/asm/sigcontext.h */
-#define FPSIMD_MAGIC 0x46508001
+#define FPSIMD_MAGIC 0x46508001
+#define GCS_MAGIC 0x47435300
-typedef struct fpsimd_context fpu_state_t;
+typedef struct fpsimd_context fpu_state_t;
-struct aux_context {
- struct fpsimd_context fpsimd;
- /* additional context to be added before "end" */
- struct _aarch64_ctx end;
+struct gcs_context {
+ struct _aarch64_ctx head;
+ __u64 gcspr;
+ __u64 features_enabled;
+ __u64 reserved;
};
-// XXX: the idetifier rt_sigcontext is expected to be struct by the CRIU code
-#define rt_sigcontext sigcontext
+struct aux_context {
+ struct fpsimd_context fpsimd;
+ struct gcs_context gcs;
+ /* additional context to be added before "end" */
+ struct _aarch64_ctx end;
+};
+
+// XXX: the identifier rt_sigcontext is expected to be struct by the CRIU code
+#define rt_sigcontext sigcontext
#include
/* Copied from the kernel source arch/arm64/kernel/signal.c */
struct rt_sigframe {
- siginfo_t info;
- ucontext_t uc;
- uint64_t fp;
- uint64_t lr;
+ siginfo_t info;
+ ucontext_t uc;
+ uint64_t fp;
+ uint64_t lr;
};
+/* clang-format off */
#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe) \
asm volatile( \
"mov sp, %0 \n" \
@@ -40,30 +51,30 @@ struct rt_sigframe {
: \
: "r"(new_sp) \
: "x8", "memory")
+/* clang-format on */
/* cr_sigcontext is copied from arch/arm64/include/uapi/asm/sigcontext.h */
struct cr_sigcontext {
- __u64 fault_address;
- /* AArch64 registers */
- __u64 regs[31];
- __u64 sp;
- __u64 pc;
- __u64 pstate;
- /* 4K reserved for FP/SIMD state and future expansion */
- __u8 __reserved[4096] __attribute__((__aligned__(16)));
+ __u64 fault_address;
+ /* AArch64 registers */
+ __u64 regs[31];
+ __u64 sp;
+ __u64 pc;
+ __u64 pstate;
+ /* 4K reserved for FP/SIMD state and future expansion */
+ __u8 __reserved[4096] __attribute__((__aligned__(16)));
};
-#define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->uc)
-#define RT_SIGFRAME_REGIP(rt_sigframe) ((long unsigned int)(rt_sigframe)->uc.uc_mcontext.pc)
-#define RT_SIGFRAME_HAS_FPU(rt_sigframe) (1)
-#define RT_SIGFRAME_SIGCONTEXT(rt_sigframe) ((struct cr_sigcontext *)&(rt_sigframe)->uc.uc_mcontext)
-#define RT_SIGFRAME_AUX_CONTEXT(rt_sigframe) ((struct aux_context*)&(RT_SIGFRAME_SIGCONTEXT(rt_sigframe)->__reserved))
-#define RT_SIGFRAME_FPU(rt_sigframe) (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->fpsimd)
-#define RT_SIGFRAME_OFFSET(rt_sigframe) 0
+#define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->uc)
+#define RT_SIGFRAME_REGIP(rt_sigframe) ((long unsigned int)(rt_sigframe)->uc.uc_mcontext.pc)
+#define RT_SIGFRAME_HAS_FPU(rt_sigframe) (1)
+#define RT_SIGFRAME_SIGCONTEXT(rt_sigframe) ((struct cr_sigcontext *)&(rt_sigframe)->uc.uc_mcontext)
+#define RT_SIGFRAME_AUX_CONTEXT(rt_sigframe) ((struct aux_context *)&(RT_SIGFRAME_SIGCONTEXT(rt_sigframe)->__reserved))
+#define RT_SIGFRAME_FPU(rt_sigframe) (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->fpsimd)
+#define RT_SIGFRAME_OFFSET(rt_sigframe) 0
+#define RT_SIGFRAME_GCS(rt_sigframe) (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->gcs)
-#define rt_sigframe_erase_sigset(sigframe) \
- memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t))
-#define rt_sigframe_copy_sigset(sigframe, from) \
- memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t))
+#define rt_sigframe_erase_sigset(sigframe) memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t))
+#define rt_sigframe_copy_sigset(sigframe, from) memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t))
#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */
diff --git a/compel/arch/aarch64/src/lib/infect.c b/compel/arch/aarch64/src/lib/infect.c
index 4b5939022..42f593c79 100644
--- a/compel/arch/aarch64/src/lib/infect.c
+++ b/compel/arch/aarch64/src/lib/infect.c
@@ -2,7 +2,9 @@
#include
#include
#include
-#include
+#include
+#include
+
#include
#include "common/page.h"
#include "uapi/compel/asm/infect-types.h"
@@ -10,6 +12,9 @@
#include "errno.h"
#include "infect.h"
#include "infect-priv.h"
+#include "asm/breakpoints.h"
+#include "asm/gcs-types.h"
+#include
unsigned __page_size = 0;
unsigned __page_shift = 0;
@@ -18,12 +23,11 @@ unsigned __page_shift = 0;
* Injected syscall instruction
*/
const char code_syscall[] = {
- 0x01, 0x00, 0x00, 0xd4, /* SVC #0 */
- 0x00, 0x00, 0x20, 0xd4 /* BRK #0 */
+ 0x01, 0x00, 0x00, 0xd4, /* SVC #0 */
+ 0x00, 0x00, 0x20, 0xd4 /* BRK #0 */
};
-static const int
-code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long));
+static const int code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long));
static inline void __always_unused __check_code_syscall(void)
{
@@ -31,40 +35,66 @@ static inline void __always_unused __check_code_syscall(void)
BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
}
-int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe,
- user_regs_struct_t *regs,
- user_fpregs_struct_t *fpregs)
+bool __compel_host_supports_gcs(void)
+{
+ unsigned long hwcap = getauxval(AT_HWCAP);
+ return (hwcap & HWCAP_GCS) != 0;
+}
+
+static bool __compel_gcs_enabled(struct cr_user_gcs *gcs)
+{
+ if (!compel_host_supports_gcs())
+ return false;
+
+ return gcs && (gcs->features_enabled & PR_SHADOW_STACK_ENABLE) != 0;
+}
+
+int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
{
struct fpsimd_context *fpsimd = RT_SIGFRAME_FPU(sigframe);
+ struct gcs_context *gcs = RT_SIGFRAME_GCS(sigframe);
memcpy(sigframe->uc.uc_mcontext.regs, regs->regs, sizeof(regs->regs));
- sigframe->uc.uc_mcontext.sp = regs->sp;
- sigframe->uc.uc_mcontext.pc = regs->pc;
- sigframe->uc.uc_mcontext.pstate = regs->pstate;
+ pr_debug("sigreturn_prep_regs_plain: sp %lx pc %lx\n", (long)regs->sp, (long)regs->pc);
- memcpy(fpsimd->vregs, fpregs->vregs, 32 * sizeof(__uint128_t));
+ sigframe->uc.uc_mcontext.sp = regs->sp;
+ sigframe->uc.uc_mcontext.pc = regs->pc;
+ sigframe->uc.uc_mcontext.pstate = regs->pstate;
- fpsimd->fpsr = fpregs->fpsr;
- fpsimd->fpcr = fpregs->fpcr;
+ memcpy(fpsimd->vregs, fpregs->fpstate.vregs, 32 * sizeof(__uint128_t));
+
+ fpsimd->fpsr = fpregs->fpstate.fpsr;
+ fpsimd->fpcr = fpregs->fpstate.fpcr;
fpsimd->head.magic = FPSIMD_MAGIC;
fpsimd->head.size = sizeof(*fpsimd);
+ if (__compel_gcs_enabled(&fpregs->gcs)) {
+ gcs->head.magic = GCS_MAGIC;
+ gcs->head.size = sizeof(*gcs);
+ gcs->reserved = 0;
+ gcs->gcspr = fpregs->gcs.gcspr_el0 - 8;
+ gcs->features_enabled = fpregs->gcs.features_enabled;
+
+ pr_debug("sigframe gcspr=%llx features_enabled=%llx\n", fpregs->gcs.gcspr_el0 - 8, fpregs->gcs.features_enabled);
+ } else {
+ pr_debug("sigframe gcspr=[disabled]\n");
+ memset(gcs, 0, sizeof(*gcs));
+ }
+
return 0;
}
-int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe,
- struct rt_sigframe *rsigframe)
+int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe)
{
return 0;
}
-int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save,
- void *arg, __maybe_unused unsigned long flags)
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
+ void *arg, __maybe_unused unsigned long flags)
{
struct iovec iov;
- user_fpregs_struct_t fpsimd;
int ret;
pr_info("Dumping GP/FPU registers for %d\n", pid);
@@ -76,25 +106,79 @@ int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save,
goto err;
}
- iov.iov_base = &fpsimd;
- iov.iov_len = sizeof(fpsimd);
+ iov.iov_base = &ext_regs->fpstate;
+ iov.iov_len = sizeof(ext_regs->fpstate);
if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) {
pr_perror("Failed to obtain FPU registers for %d", pid);
goto err;
}
- ret = save(arg, regs, &fpsimd);
+ memset(&ext_regs->gcs, 0, sizeof(ext_regs->gcs));
+
+ iov.iov_base = &ext_regs->gcs;
+ iov.iov_len = sizeof(ext_regs->gcs);
+ if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &iov) == 0) {
+ pr_info("gcs: GCSPR_EL0 for %d: 0x%llx, features: 0x%llx\n",
+ pid, ext_regs->gcs.gcspr_el0, ext_regs->gcs.features_enabled);
+
+ if (!__compel_gcs_enabled(&ext_regs->gcs))
+ pr_info("gcs: GCS is NOT enabled\n");
+ } else {
+ pr_info("gcs: GCS state not available for %d\n", pid);
+ }
+
+ ret = save(pid, arg, regs, ext_regs);
err:
return ret;
}
-int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret,
- unsigned long arg1,
- unsigned long arg2,
- unsigned long arg3,
- unsigned long arg4,
- unsigned long arg5,
- unsigned long arg6)
+int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
+{
+ struct iovec iov;
+
+ struct cr_user_gcs gcs;
+ struct iovec gcs_iov = { .iov_base = &gcs, .iov_len = sizeof(gcs) };
+
+ pr_info("Restoring GP/FPU registers for %d\n", pid);
+
+ iov.iov_base = &ext_regs->fpstate;
+ iov.iov_len = sizeof(ext_regs->fpstate);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov)) {
+ pr_perror("Failed to set FPU registers for %d", pid);
+ return -1;
+ }
+
+ if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) < 0) {
+ pr_warn("gcs: Failed to get GCS for %d\n", pid);
+ } else {
+ ext_regs->gcs = gcs;
+ compel_set_task_gcs_regs(pid, ext_regs);
+ }
+
+ return 0;
+}
+
+int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
+{
+ struct iovec iov;
+
+ pr_info("gcs: restoring GCS registers for %d\n", pid);
+ pr_info("gcs: restoring GCS: gcspr=%llx features=%llx\n",
+ ext_regs->gcs.gcspr_el0, ext_regs->gcs.features_enabled);
+
+ iov.iov_base = &ext_regs->gcs;
+ iov.iov_len = sizeof(ext_regs->gcs);
+
+ if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_GCS, &iov)) {
+ pr_perror("gcs: Failed to set GCS registers for %d", pid);
+ return -1;
+ }
+
+ return 0;
+}
+
+int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6)
{
user_regs_struct_t regs = ctl->orig.regs;
int err;
@@ -115,15 +199,12 @@ int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret,
return err;
}
-void *remote_mmap(struct parasite_ctl *ctl,
- void *addr, size_t length, int prot,
- int flags, int fd, off_t offset)
+void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd, off_t offset)
{
long map;
int err;
- err = compel_syscall(ctl, __NR_mmap, &map,
- (unsigned long)addr, length, prot, flags, fd, offset);
+ err = compel_syscall(ctl, __NR_mmap, &map, (unsigned long)addr, length, prot, flags, fd, offset);
if (err < 0 || (long)map < 0)
map = 0;
@@ -150,9 +231,7 @@ int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
long ret;
int err;
- err = compel_syscall(ctl, __NR_sigaltstack,
- &ret, 0, (unsigned long)&s->uc.uc_stack,
- 0, 0, 0, 0);
+ err = compel_syscall(ctl, __NR_sigaltstack, &ret, 0, (unsigned long)&s->uc.uc_stack, 0, 0, 0, 0);
return err ? err : ret;
}
@@ -176,3 +255,175 @@ unsigned long compel_task_size(void)
return task_size;
}
+static struct hwbp_cap *ptrace_get_hwbp_cap(pid_t pid)
+{
+ static struct hwbp_cap info;
+ static int available = -1;
+
+ if (available == -1) {
+ unsigned int val;
+ struct iovec iovec = {
+ .iov_base = &val,
+ .iov_len = sizeof(val),
+ };
+
+ if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_HW_BREAK, &iovec) < 0)
+ available = 0;
+ else {
+ info.arch = (char)((val >> 8) & 0xff);
+ info.bp_count = (char)(val & 0xff);
+
+ available = (info.arch != 0);
+ }
+ }
+
+ return available == 1 ? &info : NULL;
+}
+
+int ptrace_set_breakpoint(pid_t pid, void *addr)
+{
+ k_rtsigset_t block;
+ struct hwbp_cap *info = ptrace_get_hwbp_cap(pid);
+ struct user_hwdebug_state regs = {};
+ unsigned int ctrl = 0;
+ struct iovec iovec;
+
+ if (info == NULL || info->bp_count == 0)
+ return 0;
+
+ /*
+ * The struct is copied from `arch/arm64/include/asm/hw_breakpoint.h` in
+ * linux kernel:
+ * struct arch_hw_breakpoint_ctrl {
+ * __u32 __reserved : 19,
+ * len : 8,
+ * type : 2,
+ * privilege : 2,
+ * enabled : 1;
+ * };
+ *
+ * The part of `struct arch_hw_breakpoint_ctrl` bits meaning is defined
+ * in <>,
+ * D13.3.2 DBGBCR_EL1, Debug Breakpoint Control Registers.
+ */
+ ctrl = ARM_BREAKPOINT_LEN_4;
+ ctrl = (ctrl << 2) | ARM_BREAKPOINT_EXECUTE;
+ ctrl = (ctrl << 2) | AARCH64_BREAKPOINT_EL0;
+ ctrl = (ctrl << 1) | ENABLE_HBP;
+ regs.dbg_regs[0].addr = (__u64)addr;
+ regs.dbg_regs[0].ctrl = ctrl;
+ iovec.iov_base = ®s;
+ iovec.iov_len = (offsetof(struct user_hwdebug_state, dbg_regs) + sizeof(regs.dbg_regs[0]));
+
+ if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_HW_BREAK, &iovec))
+ return -1;
+
+ /*
+ * FIXME(issues/1429): SIGTRAP can't be blocked, otherwise its handler
+ * will be reset to the default one.
+ */
+ ksigfillset(&block);
+ ksigdelset(&block, SIGTRAP);
+ if (ptrace(PTRACE_SETSIGMASK, pid, sizeof(k_rtsigset_t), &block)) {
+ pr_perror("Can't block signals for %d", pid);
+ return -1;
+ }
+
+ if (ptrace(PTRACE_CONT, pid, NULL, NULL) != 0) {
+ pr_perror("Unable to restart the stopped tracee process %d", pid);
+ return -1;
+ }
+
+ return 1;
+}
+
+int ptrace_flush_breakpoints(pid_t pid)
+{
+ struct hwbp_cap *info = ptrace_get_hwbp_cap(pid);
+ struct user_hwdebug_state regs = {};
+ unsigned int ctrl = 0;
+ struct iovec iovec;
+
+ if (info == NULL || info->bp_count == 0)
+ return 0;
+
+ ctrl = ARM_BREAKPOINT_LEN_4;
+ ctrl = (ctrl << 2) | ARM_BREAKPOINT_EXECUTE;
+ ctrl = (ctrl << 2) | AARCH64_BREAKPOINT_EL0;
+ ctrl = (ctrl << 1) | DISABLE_HBP;
+ regs.dbg_regs[0].addr = 0ul;
+ regs.dbg_regs[0].ctrl = ctrl;
+
+ iovec.iov_base = ®s;
+ iovec.iov_len = (offsetof(struct user_hwdebug_state, dbg_regs) + sizeof(regs.dbg_regs[0]));
+
+ if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_HW_BREAK, &iovec))
+ return -1;
+
+ return 0;
+}
+
+int inject_gcs_cap_token(struct parasite_ctl *ctl, pid_t pid, struct cr_user_gcs *gcs)
+{
+ struct iovec gcs_iov = { .iov_base = gcs, .iov_len = sizeof(*gcs) };
+
+ uint64_t token_addr = gcs->gcspr_el0 - 8;
+ uint64_t sigtramp_addr = gcs->gcspr_el0 - 16;
+
+ uint64_t cap_token = ALIGN_DOWN(GCS_SIGNAL_CAP(token_addr), 8);
+ unsigned long restorer_addr;
+
+ pr_info("gcs: (setup) CAP token: 0x%lx at addr: 0x%lx\n", cap_token, token_addr);
+
+ /* Inject capability token at gcspr_el0 - 8 */
+ if (ptrace(PTRACE_POKEDATA, pid, (void *)token_addr, cap_token)) {
+ pr_perror("gcs: (setup) Inject GCS cap token failed");
+ return -1;
+ }
+
+ /* Inject restorer trampoline address (gcspr_el0 - 16) */
+ restorer_addr = ctl->parasite_ip;
+ if (ptrace(PTRACE_POKEDATA, pid, (void *)sigtramp_addr, restorer_addr)) {
+ pr_perror("gcs: (setup) Inject GCS restorer failed");
+ return -1;
+ }
+
+ /* Update GCSPR_EL0 */
+ gcs->gcspr_el0 = token_addr;
+ if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_GCS, &gcs_iov)) {
+ pr_perror("gcs: PTRACE_SETREGS FAILED");
+ return -1;
+ }
+
+ pr_debug("gcs: parasite_ip=%#lx sp=%#llx gcspr_el0=%#llx\n",
+ ctl->parasite_ip, ctl->orig.regs.sp, gcs->gcspr_el0);
+
+ return 0;
+}
+
+int parasite_setup_shstk(struct parasite_ctl *ctl, user_fpregs_struct_t *ext_regs)
+{
+ struct cr_user_gcs gcs;
+ struct iovec gcs_iov = { .iov_base = &gcs, .iov_len = sizeof(gcs) };
+ pid_t pid = ctl->rpid;
+
+ if(!__compel_host_supports_gcs())
+ return 0;
+
+ if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) != 0) {
+ pr_perror("GCS state not available for %d", pid);
+ return -1;
+ }
+
+ if (!__compel_gcs_enabled(&gcs))
+ return 0;
+
+ if (inject_gcs_cap_token(ctl, pid, &gcs)) {
+ pr_perror("Failed to inject GCS cap token for %d", pid);
+ return -1;
+ }
+
+ pr_info("gcs: GCS enabled for %d\n", pid);
+
+ return 0;
+}
diff --git a/compel/arch/arm/plugins/include/asm/syscall-types.h b/compel/arch/arm/plugins/include/asm/syscall-types.h
index cdb03ef4c..acc03de52 100644
--- a/compel/arch/arm/plugins/include/asm/syscall-types.h
+++ b/compel/arch/arm/plugins/include/asm/syscall-types.h
@@ -1,7 +1,7 @@
#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__
#define COMPEL_ARCH_SYSCALL_TYPES_H__
-#define SA_RESTORER 0x04000000
+#define SA_RESTORER 0x04000000
typedef void rt_signalfn_t(int, siginfo_t *, void *);
typedef rt_signalfn_t *rt_sighandler_t;
@@ -9,20 +9,20 @@ typedef rt_signalfn_t *rt_sighandler_t;
typedef void rt_restorefn_t(void);
typedef rt_restorefn_t *rt_sigrestore_t;
-#define _KNSIG 64
-#define _NSIG_BPW 32
+#define _KNSIG 64
+#define _NSIG_BPW 32
-#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
+#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
typedef struct {
unsigned long sig[_KNSIG_WORDS];
} k_rtsigset_t;
typedef struct {
- rt_sighandler_t rt_sa_handler;
- unsigned long rt_sa_flags;
- rt_sigrestore_t rt_sa_restorer;
- k_rtsigset_t rt_sa_mask;
+ rt_sighandler_t rt_sa_handler;
+ unsigned long rt_sa_flags;
+ rt_sigrestore_t rt_sa_restorer;
+ k_rtsigset_t rt_sa_mask;
} rt_sigaction_t;
#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */
diff --git a/compel/arch/arm/plugins/std/parasite-head.S b/compel/arch/arm/plugins/std/parasite-head.S
index e72646b50..6e46bed1f 100644
--- a/compel/arch/arm/plugins/std/parasite-head.S
+++ b/compel/arch/arm/plugins/std/parasite-head.S
@@ -2,21 +2,7 @@
.section .head.text, "ax"
ENTRY(__export_parasite_head_start)
- sub r2, pc, #8 @ get the address of this instruction
-
- adr r0, __export_parasite_cmd
- ldr r0, [r0]
-
- adr r1, parasite_args_ptr
- ldr r1, [r1]
- add r1, r1, r2 @ fixup __export_parasite_args
-
bl parasite_service
.byte 0xf0, 0x01, 0xf0, 0xe7 @ the instruction UDF #32 generates the signal SIGTRAP in Linux
-parasite_args_ptr:
- .long __export_parasite_args
-
-__export_parasite_cmd:
- .long 0
END(__export_parasite_head_start)
diff --git a/compel/arch/arm/plugins/std/syscalls/syscall-aux.h b/compel/arch/arm/plugins/std/syscalls/syscall-aux.h
index 3d2056b5a..7418546e1 100644
--- a/compel/arch/arm/plugins/std/syscalls/syscall-aux.h
+++ b/compel/arch/arm/plugins/std/syscalls/syscall-aux.h
@@ -1,27 +1,27 @@
#ifndef __NR_mmap2
-# define __NR_mmap2 192
+#define __NR_mmap2 192
#endif
#ifndef __ARM_NR_BASE
-# define __ARM_NR_BASE 0x0f0000
+#define __ARM_NR_BASE 0x0f0000
#endif
#ifndef __ARM_NR_breakpoint
-# define __ARM_NR_breakpoint (__ARM_NR_BASE+1)
+#define __ARM_NR_breakpoint (__ARM_NR_BASE + 1)
#endif
#ifndef __ARM_NR_cacheflush
-# define __ARM_NR_cacheflush (__ARM_NR_BASE+2)
+#define __ARM_NR_cacheflush (__ARM_NR_BASE + 2)
#endif
#ifndef __ARM_NR_usr26
-# define __ARM_NR_usr26 (__ARM_NR_BASE+3)
+#define __ARM_NR_usr26 (__ARM_NR_BASE + 3)
#endif
#ifndef __ARM_NR_usr32
-# define __ARM_NR_usr32 (__ARM_NR_BASE+4)
+#define __ARM_NR_usr32 (__ARM_NR_BASE + 4)
#endif
#ifndef __ARM_NR_set_tls
-# define __ARM_NR_set_tls (__ARM_NR_BASE+5)
+#define __ARM_NR_set_tls (__ARM_NR_BASE + 5)
#endif
diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def
index 721ff16dc..f4deb02b2 100644
--- a/compel/arch/arm/plugins/std/syscalls/syscall.def
+++ b/compel/arch/arm/plugins/std/syscalls/syscall.def
@@ -39,7 +39,7 @@ recvfrom 207 292 (int sockfd, void *ubuf, size_t size, unsigned int flags, str
sendmsg 211 296 (int sockfd, const struct msghdr *msg, int flags)
recvmsg 212 297 (int sockfd, struct msghdr *msg, int flags)
shutdown 210 293 (int sockfd, int how)
-bind 235 282 (int sockfd, const struct sockaddr *addr, int addrlen)
+bind 200 282 (int sockfd, const struct sockaddr *addr, int addrlen)
setsockopt 208 294 (int sockfd, int level, int optname, const void *optval, socklen_t optlen)
getsockopt 209 295 (int sockfd, int level, int optname, const void *optval, socklen_t *optlen)
clone 220 120 (unsigned long flags, void *child_stack, void *parent_tid, unsigned long newtls, void *child_tid)
@@ -85,7 +85,7 @@ timer_settime 110 258 (kernel_timer_t timer_id, int flags, const struct itimer
timer_gettime 108 259 (int timer_id, const struct itimerspec *setting)
timer_getoverrun 109 260 (int timer_id)
timer_delete 111 261 (kernel_timer_t timer_id)
-clock_gettime 113 263 (const clockid_t which_clock, const struct timespec *tp)
+clock_gettime 113 263 (clockid_t which_clock, struct timespec *tp)
exit_group 94 248 (int error_code)
set_robust_list 99 338 (struct robust_list_head *head, size_t len)
get_robust_list 100 339 (int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
@@ -112,3 +112,16 @@ userfaultfd 282 388 (int flags)
fallocate 47 352 (int fd, int mode, loff_t offset, loff_t len)
cacheflush ! 983042 (void *start, void *end, int flags)
ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)
+open_tree 428 428 (int dirfd, const char *pathname, unsigned int flags)
+move_mount 429 429 (int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags)
+fsopen 430 430 (char *fsname, unsigned int flags)
+fsconfig 431 431 (int fd, unsigned int cmd, const char *key, const char *value, int aux)
+fsmount 432 432 (int fd, unsigned int flags, unsigned int attr_flags)
+clone3 435 435 (struct clone_args *uargs, size_t size)
+close_range 436 436 (unsigned int fd, unsigned int max_fd, unsigned int flags)
+pidfd_open 434 434 (pid_t pid, unsigned int flags)
+openat2 437 437 (int dirfd, char *pathname, struct open_how *how, size_t size)
+pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags)
+rseq 293 398 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+membarrier 283 389 (int cmd, unsigned int flags, int cpu_id)
+map_shadow_stack 453 ! (unsigned long addr, unsigned long size, unsigned int flags)
\ No newline at end of file
diff --git a/compel/arch/arm/scripts/compel-pack.lds.S b/compel/arch/arm/scripts/compel-pack.lds.S
index f8a4739f3..3d97bb139 100644
--- a/compel/arch/arm/scripts/compel-pack.lds.S
+++ b/compel/arch/arm/scripts/compel-pack.lds.S
@@ -29,8 +29,4 @@ SECTIONS
*(.eh_frame*)
*(*)
}
-
-/* Parasite args should have 4 bytes align, as we have futex inside. */
-. = ALIGN(4);
-__export_parasite_args = .;
}
diff --git a/compel/arch/arm/src/lib/handle-elf.c b/compel/arch/arm/src/lib/handle-elf.c
index 8abf8dad1..a84524abd 100644
--- a/compel/arch/arm/src/lib/handle-elf.c
+++ b/compel/arch/arm/src/lib/handle-elf.c
@@ -1,14 +1,12 @@
#include
-
-#include "uapi/compel.h"
+#include
#include "handle-elf.h"
#include "piegen.h"
#include "log.h"
-static const unsigned char __maybe_unused
-elf_ident_32[EI_NIDENT] = {
- 0x7f, 0x45, 0x4c, 0x46, 0x01, 0x01, 0x01, 0x00,
+static const unsigned char __maybe_unused elf_ident_32[EI_NIDENT] = {
+ 0x7f, 0x45, 0x4c, 0x46, 0x01, 0x01, 0x01, 0x00, /* clang-format */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
diff --git a/compel/arch/arm/src/lib/include/handle-elf.h b/compel/arch/arm/src/lib/include/handle-elf.h
index e5971f37b..4b5e1457a 100644
--- a/compel/arch/arm/src/lib/include/handle-elf.h
+++ b/compel/arch/arm/src/lib/include/handle-elf.h
@@ -3,8 +3,8 @@
#include "elf32-types.h"
-#define __handle_elf handle_elf_arm
-#define arch_is_machine_supported(e_machine) (e_machine == EM_ARM)
+#define __handle_elf handle_elf_arm
+#define arch_is_machine_supported(e_machine) (e_machine == EM_ARM)
extern int handle_elf_arm(void *mem, size_t size);
diff --git a/compel/arch/arm/src/lib/include/syscall.h b/compel/arch/arm/src/lib/include/syscall.h
index e2ec1272e..13ee906e1 100644
--- a/compel/arch/arm/src/lib/include/syscall.h
+++ b/compel/arch/arm/src/lib/include/syscall.h
@@ -1,4 +1,8 @@
#ifndef __COMPEL_SYSCALL_H__
#define __COMPEL_SYSCALL_H__
-#define __NR(syscall, compat) __NR_##syscall
+#define __NR(syscall, compat) \
+ ({ \
+ (void)compat; \
+ __NR_##syscall; \
+ })
#endif
diff --git a/compel/arch/arm/src/lib/include/uapi/asm/cpu.h b/compel/arch/arm/src/lib/include/uapi/asm/cpu.h
index c35460e15..12e749508 100644
--- a/compel/arch/arm/src/lib/include/uapi/asm/cpu.h
+++ b/compel/arch/arm/src/lib/include/uapi/asm/cpu.h
@@ -1,6 +1,7 @@
#ifndef UAPI_COMPEL_ASM_CPU_H__
#define UAPI_COMPEL_ASM_CPU_H__
-typedef struct { } compel_cpuinfo_t;
+typedef struct {
+} compel_cpuinfo_t;
#endif /* UAPI_COMPEL_ASM_CPU_H__ */
diff --git a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h
index b8286d404..8d328252e 100644
--- a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h
+++ b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h
@@ -4,8 +4,8 @@
#include
#include
-#define SIGMAX 64
-#define SIGMAX_OLD 31
+#define SIGMAX 64
+#define SIGMAX_OLD 31
/*
* Copied from the Linux kernel header arch/arm/include/asm/ptrace.h
@@ -14,53 +14,62 @@
*/
typedef struct {
- long uregs[18];
+ long uregs[18];
} user_regs_struct_t;
-typedef struct user_vfp user_fpregs_struct_t;
+#define __compel_arch_fetch_thread_area(tid, th) 0
+#define compel_arch_fetch_thread_area(tctl) 0
+#define compel_arch_get_tls_task(ctl, tls)
+#define compel_arch_get_tls_thread(tctl, tls)
-#define ARM_cpsr uregs[16]
-#define ARM_pc uregs[15]
-#define ARM_lr uregs[14]
-#define ARM_sp uregs[13]
-#define ARM_ip uregs[12]
-#define ARM_fp uregs[11]
-#define ARM_r10 uregs[10]
-#define ARM_r9 uregs[9]
-#define ARM_r8 uregs[8]
-#define ARM_r7 uregs[7]
-#define ARM_r6 uregs[6]
-#define ARM_r5 uregs[5]
-#define ARM_r4 uregs[4]
-#define ARM_r3 uregs[3]
-#define ARM_r2 uregs[2]
-#define ARM_r1 uregs[1]
-#define ARM_r0 uregs[0]
-#define ARM_ORIG_r0 uregs[17]
+typedef struct user_vfp user_fpregs_struct_t;
+#define ARM_cpsr uregs[16]
+#define ARM_pc uregs[15]
+#define ARM_lr uregs[14]
+#define ARM_sp uregs[13]
+#define ARM_ip uregs[12]
+#define ARM_fp uregs[11]
+#define ARM_r10 uregs[10]
+#define ARM_r9 uregs[9]
+#define ARM_r8 uregs[8]
+#define ARM_r7 uregs[7]
+#define ARM_r6 uregs[6]
+#define ARM_r5 uregs[5]
+#define ARM_r4 uregs[4]
+#define ARM_r3 uregs[3]
+#define ARM_r2 uregs[2]
+#define ARM_r1 uregs[1]
+#define ARM_r0 uregs[0]
+#define ARM_ORIG_r0 uregs[17]
/* Copied from arch/arm/include/asm/user.h */
struct user_vfp {
- unsigned long long fpregs[32];
- unsigned long fpscr;
+ unsigned long long fpregs[32];
+ unsigned long fpscr;
};
struct user_vfp_exc {
- unsigned long fpexc;
- unsigned long fpinst;
- unsigned long fpinst2;
+ unsigned long fpexc;
+ unsigned long fpinst;
+ unsigned long fpinst2;
};
-#define REG_RES(regs) ((regs).ARM_r0)
-#define REG_IP(regs) ((regs).ARM_pc)
-#define REG_SP(regs) ((regs).ARM_sp)
-#define REG_SYSCALL_NR(regs) ((regs).ARM_r7)
+#define REG_RES(regs) ((regs).ARM_r0)
+#define REG_IP(regs) ((regs).ARM_pc)
+#define SET_REG_IP(regs, val) ((regs).ARM_pc = (val))
+#define REG_SP(regs) ((regs).ARM_sp)
+#define REG_SYSCALL_NR(regs) ((regs).ARM_r7)
-#define user_regs_native(pregs) true
+#define user_regs_native(pregs) true
-#define ARCH_SI_TRAP TRAP_BRKPT
+#define ARCH_SI_TRAP TRAP_BRKPT
-#define __NR(syscall, compat) __NR_##syscall
+#define __NR(syscall, compat) \
+ ({ \
+ (void)compat; \
+ __NR_##syscall; \
+ })
#endif /* UAPI_COMPEL_ASM_TYPES_H__ */
diff --git a/compel/arch/arm/src/lib/include/uapi/asm/processor-flags.h b/compel/arch/arm/src/lib/include/uapi/asm/processor-flags.h
index 8745f4459..36edf231a 100644
--- a/compel/arch/arm/src/lib/include/uapi/asm/processor-flags.h
+++ b/compel/arch/arm/src/lib/include/uapi/asm/processor-flags.h
@@ -6,37 +6,37 @@
/*
* PSR bits
*/
-#define USR26_MODE 0x00000000
-#define FIQ26_MODE 0x00000001
-#define IRQ26_MODE 0x00000002
-#define SVC26_MODE 0x00000003
-#define USR_MODE 0x00000010
-#define FIQ_MODE 0x00000011
-#define IRQ_MODE 0x00000012
-#define SVC_MODE 0x00000013
-#define ABT_MODE 0x00000017
-#define UND_MODE 0x0000001b
-#define SYSTEM_MODE 0x0000001f
-#define MODE32_BIT 0x00000010
-#define MODE_MASK 0x0000001f
-#define PSR_T_BIT 0x00000020
-#define PSR_F_BIT 0x00000040
-#define PSR_I_BIT 0x00000080
-#define PSR_A_BIT 0x00000100
-#define PSR_E_BIT 0x00000200
-#define PSR_J_BIT 0x01000000
-#define PSR_Q_BIT 0x08000000
-#define PSR_V_BIT 0x10000000
-#define PSR_C_BIT 0x20000000
-#define PSR_Z_BIT 0x40000000
-#define PSR_N_BIT 0x80000000
+#define USR26_MODE 0x00000000
+#define FIQ26_MODE 0x00000001
+#define IRQ26_MODE 0x00000002
+#define SVC26_MODE 0x00000003
+#define USR_MODE 0x00000010
+#define FIQ_MODE 0x00000011
+#define IRQ_MODE 0x00000012
+#define SVC_MODE 0x00000013
+#define ABT_MODE 0x00000017
+#define UND_MODE 0x0000001b
+#define SYSTEM_MODE 0x0000001f
+#define MODE32_BIT 0x00000010
+#define MODE_MASK 0x0000001f
+#define PSR_T_BIT 0x00000020
+#define PSR_F_BIT 0x00000040
+#define PSR_I_BIT 0x00000080
+#define PSR_A_BIT 0x00000100
+#define PSR_E_BIT 0x00000200
+#define PSR_J_BIT 0x01000000
+#define PSR_Q_BIT 0x08000000
+#define PSR_V_BIT 0x10000000
+#define PSR_C_BIT 0x20000000
+#define PSR_Z_BIT 0x40000000
+#define PSR_N_BIT 0x80000000
/*
* Groups of PSR bits
*/
-#define PSR_f 0xff000000 /* Flags */
-#define PSR_s 0x00ff0000 /* Status */
-#define PSR_x 0x0000ff00 /* Extension */
-#define PSR_c 0x000000ff /* Control */
+#define PSR_f 0xff000000 /* Flags */
+#define PSR_s 0x00ff0000 /* Status */
+#define PSR_x 0x0000ff00 /* Extension */
+#define PSR_c 0x000000ff /* Control */
#endif
diff --git a/compel/arch/arm/src/lib/include/uapi/asm/sigframe.h b/compel/arch/arm/src/lib/include/uapi/asm/sigframe.h
index 23ada50aa..3db9978d0 100644
--- a/compel/arch/arm/src/lib/include/uapi/asm/sigframe.h
+++ b/compel/arch/arm/src/lib/include/uapi/asm/sigframe.h
@@ -6,42 +6,42 @@
/* Copied from the Linux kernel header arch/arm/include/asm/sigcontext.h */
struct rt_sigcontext {
- unsigned long trap_no;
- unsigned long error_code;
- unsigned long oldmask;
- unsigned long arm_r0;
- unsigned long arm_r1;
- unsigned long arm_r2;
- unsigned long arm_r3;
- unsigned long arm_r4;
- unsigned long arm_r5;
- unsigned long arm_r6;
- unsigned long arm_r7;
- unsigned long arm_r8;
- unsigned long arm_r9;
- unsigned long arm_r10;
- unsigned long arm_fp;
- unsigned long arm_ip;
- unsigned long arm_sp;
- unsigned long arm_lr;
- unsigned long arm_pc;
- unsigned long arm_cpsr;
- unsigned long fault_address;
+ unsigned long trap_no;
+ unsigned long error_code;
+ unsigned long oldmask;
+ unsigned long arm_r0;
+ unsigned long arm_r1;
+ unsigned long arm_r2;
+ unsigned long arm_r3;
+ unsigned long arm_r4;
+ unsigned long arm_r5;
+ unsigned long arm_r6;
+ unsigned long arm_r7;
+ unsigned long arm_r8;
+ unsigned long arm_r9;
+ unsigned long arm_r10;
+ unsigned long arm_fp;
+ unsigned long arm_ip;
+ unsigned long arm_sp;
+ unsigned long arm_lr;
+ unsigned long arm_pc;
+ unsigned long arm_cpsr;
+ unsigned long fault_address;
};
/* Copied from the Linux kernel header arch/arm/include/asm/ucontext.h */
-#define VFP_MAGIC 0x56465001
-#define VFP_STORAGE_SIZE sizeof(struct vfp_sigframe)
+#define VFP_MAGIC 0x56465001
+#define VFP_STORAGE_SIZE sizeof(struct vfp_sigframe)
struct vfp_sigframe {
- unsigned long magic;
- unsigned long size;
- struct user_vfp ufp;
- struct user_vfp_exc ufp_exc;
+ unsigned long magic;
+ unsigned long size;
+ struct user_vfp ufp;
+ struct user_vfp_exc ufp_exc;
};
-typedef struct vfp_sigframe fpu_state_t;
+typedef struct vfp_sigframe fpu_state_t;
struct aux_sigframe {
/*
@@ -49,23 +49,23 @@ struct aux_sigframe {
struct iwmmxt_sigframe iwmmxt;
*/
- struct vfp_sigframe vfp;
- unsigned long end_magic;
+ struct vfp_sigframe vfp;
+ unsigned long end_magic;
} __attribute__((aligned(8)));
#include
struct sigframe {
- struct rt_ucontext uc;
- unsigned long retcode[2];
+ struct rt_ucontext uc;
+ unsigned long retcode[2];
};
struct rt_sigframe {
- struct rt_siginfo info;
- struct sigframe sig;
+ struct rt_siginfo info;
+ struct sigframe sig;
};
-
+/* clang-format off */
#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe) \
asm volatile( \
"mov sp, %0 \n" \
@@ -74,17 +74,16 @@ struct rt_sigframe {
: \
: "r"(new_sp) \
: "memory")
+/* clang-format on */
-#define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->sig.uc)
-#define RT_SIGFRAME_REGIP(rt_sigframe) (rt_sigframe)->sig.uc.uc_mcontext.arm_ip
-#define RT_SIGFRAME_HAS_FPU(rt_sigframe) 1
-#define RT_SIGFRAME_AUX_SIGFRAME(rt_sigframe) ((struct aux_sigframe *)&(rt_sigframe)->sig.uc.uc_regspace)
-#define RT_SIGFRAME_FPU(rt_sigframe) (&RT_SIGFRAME_AUX_SIGFRAME(rt_sigframe)->vfp)
-#define RT_SIGFRAME_OFFSET(rt_sigframe) 0
+#define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->sig.uc)
+#define RT_SIGFRAME_REGIP(rt_sigframe) (rt_sigframe)->sig.uc.uc_mcontext.arm_ip
+#define RT_SIGFRAME_HAS_FPU(rt_sigframe) 1
+#define RT_SIGFRAME_AUX_SIGFRAME(rt_sigframe) ((struct aux_sigframe *)&(rt_sigframe)->sig.uc.uc_regspace)
+#define RT_SIGFRAME_FPU(rt_sigframe) (&RT_SIGFRAME_AUX_SIGFRAME(rt_sigframe)->vfp)
+#define RT_SIGFRAME_OFFSET(rt_sigframe) 0
-#define rt_sigframe_erase_sigset(sigframe) \
- memset(&sigframe->sig.uc.uc_sigmask, 0, sizeof(k_rtsigset_t))
-#define rt_sigframe_copy_sigset(sigframe, from) \
- memcpy(&sigframe->sig.uc.uc_sigmask, from, sizeof(k_rtsigset_t))
+#define rt_sigframe_erase_sigset(sigframe) memset(&sigframe->sig.uc.uc_sigmask, 0, sizeof(k_rtsigset_t))
+#define rt_sigframe_copy_sigset(sigframe, from) memcpy(&sigframe->sig.uc.uc_sigmask, from, sizeof(k_rtsigset_t))
#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */
diff --git a/compel/arch/arm/src/lib/infect.c b/compel/arch/arm/src/lib/infect.c
index c17cb9c9b..a9fb639e2 100644
--- a/compel/arch/arm/src/lib/infect.c
+++ b/compel/arch/arm/src/lib/infect.c
@@ -1,8 +1,11 @@
#include
#include
#include
+#include
#include
#include
+#include
+
#include "common/page.h"
#include "uapi/compel/asm/infect-types.h"
#include "log.h"
@@ -14,12 +17,11 @@
* Injected syscall instruction
*/
const char code_syscall[] = {
- 0x00, 0x00, 0x00, 0xef, /* SVC #0 */
- 0xf0, 0x01, 0xf0, 0xe7 /* UDF #32 */
+ 0x00, 0x00, 0x00, 0xef, /* SVC #0 */
+ 0xf0, 0x01, 0xf0, 0xe7 /* UDF #32 */
};
-static const int
-code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long));
+static const int code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long));
static inline __always_unused void __check_code_syscall(void)
{
@@ -27,9 +29,7 @@ static inline __always_unused void __check_code_syscall(void)
BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
}
-int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe,
- user_regs_struct_t *regs,
- user_fpregs_struct_t *fpregs)
+int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
{
struct aux_sigframe *aux = (struct aux_sigframe *)(void *)&sigframe->sig.uc.uc_regspace;
@@ -59,22 +59,20 @@ int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe,
return 0;
}
-int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe,
- struct rt_sigframe *rsigframe)
+int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe)
{
return 0;
}
#define PTRACE_GETVFPREGS 27
-int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save,
- void *arg, __maybe_unused unsigned long flags)
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *vfp, save_regs_t save,
+ void *arg, __maybe_unused unsigned long flags)
{
- user_fpregs_struct_t vfp;
int ret = -1;
pr_info("Dumping GP/FPU registers for %d\n", pid);
- if (ptrace(PTRACE_GETVFPREGS, pid, NULL, &vfp)) {
+ if (ptrace(PTRACE_GETVFPREGS, pid, NULL, vfp)) {
pr_perror("Can't obtain FPU registers for %d", pid);
goto err;
}
@@ -90,24 +88,30 @@ int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save,
regs->ARM_pc -= 4;
break;
case -ERESTART_RESTARTBLOCK:
- regs->ARM_r0 = __NR_restart_syscall;
- regs->ARM_pc -= 4;
+ pr_warn("Will restore %d with interrupted system call\n", pid);
+ regs->ARM_r0 = -EINTR;
break;
}
}
- ret = save(arg, regs, &vfp);
+ ret = save(pid, arg, regs, vfp);
err:
return ret;
}
-int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret,
- unsigned long arg1,
- unsigned long arg2,
- unsigned long arg3,
- unsigned long arg4,
- unsigned long arg5,
- unsigned long arg6)
+int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
+{
+ pr_info("Restoring GP/FPU registers for %d\n", pid);
+
+ if (ptrace(PTRACE_SETVFPREGS, pid, NULL, ext_regs)) {
+ pr_perror("Can't set FPU registers for %d", pid);
+ return -1;
+ }
+ return 0;
+}
+
+int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6)
{
user_regs_struct_t regs = ctl->orig.regs;
int err;
@@ -126,9 +130,7 @@ int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret,
return err;
}
-void *remote_mmap(struct parasite_ctl *ctl,
- void *addr, size_t length, int prot,
- int flags, int fd, off_t offset)
+void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd, off_t offset)
{
long map;
int err;
@@ -136,8 +138,7 @@ void *remote_mmap(struct parasite_ctl *ctl,
if (offset & ~PAGE_MASK)
return 0;
- err = compel_syscall(ctl, __NR_mmap2, &map,
- (unsigned long)addr, length, prot, flags, fd, offset >> 12);
+ err = compel_syscall(ctl, __NR_mmap2, &map, (unsigned long)addr, length, prot, flags, fd, offset >> 12);
if (err < 0 || map > ctl->ictx.task_size)
map = 0;
@@ -167,9 +168,7 @@ int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
long ret;
int err;
- err = compel_syscall(ctl, __NR_sigaltstack,
- &ret, 0, (unsigned long)&s->sig.uc.uc_stack,
- 0, 0, 0, 0);
+ err = compel_syscall(ctl, __NR_sigaltstack, &ret, 0, (unsigned long)&s->sig.uc.uc_stack, 0, 0, 0, 0);
return err ? err : ret;
}
@@ -178,9 +177,9 @@ int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
* arch/arm/include/asm/memory.h
* arch/arm/Kconfig (PAGE_OFFSET values in Memory split section)
*/
-#define TASK_SIZE_MIN 0x3f000000
-#define TASK_SIZE_MAX 0xbf000000
-#define SZ_1G 0x40000000
+#define TASK_SIZE_MIN 0x3f000000
+#define TASK_SIZE_MAX 0xbf000000
+#define SZ_1G 0x40000000
unsigned long compel_task_size(void)
{
@@ -192,4 +191,3 @@ unsigned long compel_task_size(void)
return task_size;
}
-
diff --git a/compel/arch/loongarch64/plugins/include/asm/prologue.h b/compel/arch/loongarch64/plugins/include/asm/prologue.h
new file mode 100644
index 000000000..c19ce54d7
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/include/asm/prologue.h
@@ -0,0 +1,35 @@
+#ifndef __ASM_PROLOGUE_H__
+#define __ASM_PROLOGUE_H__
+
+#ifndef __ASSEMBLY__
+
+#include
+#include
+#include
+
+#include
+
+#define sys_recv(sockfd, ubuf, size, flags) sys_recvfrom(sockfd, ubuf, size, flags, NULL, NULL)
+
+typedef struct prologue_init_args {
+ struct sockaddr_un ctl_sock_addr;
+ unsigned int ctl_sock_addr_len;
+
+ unsigned int arg_s;
+ void *arg_p;
+
+ void *sigframe;
+} prologue_init_args_t;
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Reserve enough space for sigframe.
+ *
+ * FIXME It is rather should be taken from sigframe header.
+ */
+#define PROLOGUE_SGFRAME_SIZE 4096
+
+#define PROLOGUE_INIT_ARGS_SIZE 1024
+
+#endif /* __ASM_PROLOGUE_H__ */
diff --git a/compel/arch/loongarch64/plugins/include/asm/syscall-types.h b/compel/arch/loongarch64/plugins/include/asm/syscall-types.h
new file mode 100644
index 000000000..b883bd8be
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/include/asm/syscall-types.h
@@ -0,0 +1,30 @@
+#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__
+#define COMPEL_ARCH_SYSCALL_TYPES_H__
+
+#include
+/* Types for sigaction, sigprocmask syscalls */
+typedef void rt_signalfn_t(int, siginfo_t *, void *);
+typedef rt_signalfn_t *rt_sighandler_t;
+
+typedef void rt_restorefn_t(void);
+typedef rt_restorefn_t *rt_sigrestore_t;
+
+/* refer to arch/loongarch/include/uapi/asm/signal.h */
+#define _KNSIG 64
+#define _NSIG_BPW BITS_PER_LONG
+#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
+
+typedef struct {
+ uint64_t sig[_KNSIG_WORDS];
+} k_rtsigset_t;
+
+typedef struct {
+ rt_sighandler_t rt_sa_handler;
+ unsigned long rt_sa_flags;
+ rt_sigrestore_t rt_sa_restorer;
+ k_rtsigset_t rt_sa_mask;
+} rt_sigaction_t;
+
+#define SA_RESTORER 0x04000000
+
+#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */
diff --git a/compel/arch/loongarch64/plugins/include/features.h b/compel/arch/loongarch64/plugins/include/features.h
new file mode 100644
index 000000000..b4a3cded2
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/include/features.h
@@ -0,0 +1,4 @@
+#ifndef __COMPEL_ARCH_FEATURES_H
+#define __COMPEL_ARCH_FEATURES_H
+
+#endif /* __COMPEL_ARCH_FEATURES_H */
diff --git a/compel/arch/loongarch64/plugins/std/parasite-head.S b/compel/arch/loongarch64/plugins/std/parasite-head.S
new file mode 100644
index 000000000..3a960490e
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/std/parasite-head.S
@@ -0,0 +1,9 @@
+
+#include "common/asm/linkage.h"
+
+ .section .head.text, "ax"
+ENTRY(__export_parasite_head_start)
+ bl parasite_service;
+ break 0;
+END(__export_parasite_head_start)
+
diff --git a/compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls b/compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls
new file mode 100644
index 000000000..0d08f34e1
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/std/syscalls/Makefile.syscalls
@@ -0,0 +1,117 @@
+std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/syscalls-64.o
+sys-proto-types := $(obj)/include/uapi/std/syscall-types.h
+sys-proto-generic := $(obj)/include/uapi/std/syscall.h
+sys-codes-generic := $(obj)/include/uapi/std/syscall-codes.h
+sys-codes = $(obj)/include/uapi/std/syscall-codes-$(1).h
+sys-proto = $(obj)/include/uapi/std/syscall-$(1).h
+sys-def = $(PLUGIN_ARCH_DIR)/std/syscalls/syscall_$(1).tbl
+sys-asm = $(PLUGIN_ARCH_DIR)/std/syscalls-$(1).S
+sys-asm-common-name = std/syscalls/syscall-common-loongarch-$(1).S
+sys-asm-common = $(PLUGIN_ARCH_DIR)/$(sys-asm-common-name)
+sys-asm-types := $(obj)/include/uapi/std/asm/syscall-types.h
+sys-exec-tbl = $(PLUGIN_ARCH_DIR)/std/sys-exec-tbl-$(1).c
+
+sys-bits := 64
+
+AV := $$$$
+
+define gen-rule-sys-codes
+$(sys-codes): $(sys-def) $(sys-proto-types)
+ $(call msg-gen, $$@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $$@
+ $(Q) echo "#ifndef ASM_SYSCALL_CODES_H_$(1)__" >> $$@
+ $(Q) echo "#define ASM_SYSCALL_CODES_H_$(1)__" >> $$@
+ $(Q) cat $$< | awk '/^__NR/{SYSN=$(AV)1; \
+ sub("^__NR", "SYS", SYSN); \
+ print "\n#ifndef ", $(AV)1; \
+ print "#define", $(AV)1, $(AV)2; \
+ print "#endif"; \
+ print "\n#ifndef ", SYSN; \
+ print "#define ", SYSN, $(AV)1; \
+ print "#endif";}' >> $$@
+ $(Q) echo "#endif /* ASM_SYSCALL_CODES_H_$(1)__ */" >> $$@
+endef
+
+define gen-rule-sys-proto
+$(sys-proto): $(sys-def) $(sys-proto-types)
+ $(call msg-gen, $$@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $$@
+ $(Q) echo "#ifndef ASM_SYSCALL_PROTO_H_$(1)__" >> $$@
+ $(Q) echo "#define ASM_SYSCALL_PROTO_H_$(1)__" >> $$@
+ $(Q) echo '#include ' >> $$@
+ $(Q) echo '#include ' >> $$@
+ifeq ($(1),32)
+ $(Q) echo '#include "asm/syscall32.h"' >> $$@
+endif
+ $(Q) cat $$< | awk '/^__NR/{print "extern long", $(AV)3, \
+ substr($(AV)0, index($(AV)0,$(AV)4)), ";"}' >> $$@
+ $(Q) echo "#endif /* ASM_SYSCALL_PROTO_H_$(1)__ */" >> $$@
+endef
+
+define gen-rule-sys-asm
+$(sys-asm): $(sys-def) $(sys-asm-common) $(sys-codes) $(sys-proto) $(sys-proto-types)
+ $(call msg-gen, $$@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $$@
+ $(Q) echo '#include ' >> $$@
+ $(Q) echo '#include "$(sys-asm-common-name)"' >> $$@
+ $(Q) cat $$< | awk '/^__NR/{print "SYSCALL(", $(AV)3, ",", $(AV)2, ")"}' >> $$@
+endef
+
+define gen-rule-sys-exec-tbl
+$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic) $(sys-proto-types)
+ $(call msg-gen, $$@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $$@
+ $(Q) cat $$< | awk '/^__NR/{print \
+ "SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}' >> $$@
+endef
+
+$(sys-codes-generic): $(sys-proto-types)
+ $(call msg-gen, $@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $@
+ $(Q) echo "#ifndef __ASM_CR_SYSCALL_CODES_H__" >> $@
+ $(Q) echo "#define __ASM_CR_SYSCALL_CODES_H__" >> $@
+ $(Q) echo '#include ' >> $@
+ $(Q) cat $< | awk '/^__NR/{NR32=$$1; \
+ sub("^__NR", "__NR32", NR32); \
+ print "\n#ifndef ", NR32; \
+ print "#define ", NR32, $$2; \
+ print "#endif";}' >> $@
+ $(Q) echo "#endif /* __ASM_CR_SYSCALL_CODES_H__ */" >> $@
+mrproper-y += $(sys-codes-generic)
+
+$(sys-proto-generic): $(strip $(call map,sys-proto,$(sys-bits))) $(sys-proto-types)
+ $(call msg-gen, $@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $@
+ $(Q) echo "#ifndef __ASM_CR_SYSCALL_PROTO_H__" >> $@
+ $(Q) echo "#define __ASM_CR_SYSCALL_PROTO_H__" >> $@
+ $(Q) echo "" >> $@
+ $(Q) echo '#include ' >> $@
+ $(Q) echo "" >> $@
+ $(Q) echo "#endif /* __ASM_CR_SYSCALL_PROTO_H__ */" >> $@
+mrproper-y += $(sys-proto-generic)
+
+define gen-rule-sys-exec-tbl
+$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic)
+ $(call msg-gen, $$@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $$@
+ $(Q) cat $$< | awk '/^__NR/{print \
+ "SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}' >> $$@
+endef
+
+$(eval $(call map,gen-rule-sys-codes,$(sys-bits)))
+$(eval $(call map,gen-rule-sys-proto,$(sys-bits)))
+$(eval $(call map,gen-rule-sys-asm,$(sys-bits)))
+$(eval $(call map,gen-rule-sys-exec-tbl,$(sys-bits)))
+
+$(sys-asm-types): $(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h
+ $(call msg-gen, $@)
+ $(Q) ln -s ../../../../../../$(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h $(sys-asm-types)
+
+std-headers-deps += $(call sys-codes,$(sys-bits))
+std-headers-deps += $(call sys-proto,$(sys-bits))
+std-headers-deps += $(call sys-asm,$(sys-bits))
+std-headers-deps += $(call sys-exec-tbl,$(sys-bits))
+std-headers-deps += $(sys-codes-generic)
+std-headers-deps += $(sys-proto-generic)
+std-headers-deps += $(sys-asm-types)
+mrproper-y += $(std-headers-deps)
diff --git a/compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S b/compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S
new file mode 100644
index 000000000..fff894466
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/std/syscalls/syscall-common-loongarch-64.S
@@ -0,0 +1,44 @@
+#include "common/asm/linkage.h"
+
+#define SYSCALL(name, opcode) \
+ENTRY(name); \
+ addi.d $a7, $zero, opcode; \
+ syscall 0; \
+ jirl $r0, $r1, 0; \
+END(name)
+
+#ifndef AT_FDCWD
+#define AT_FDCWD -100
+#endif
+
+#ifndef AT_REMOVEDIR
+#define AT_REMOVEDIR 0x200
+#endif
+
+ENTRY(sys_open)
+ or $a3, $zero, $a2
+ or $a2, $zero, $a1
+ or $a1, $zero, $a0
+ addi.d $a0, $zero, AT_FDCWD
+ b sys_openat
+END(sys_open)
+
+ENTRY(sys_mkdir)
+ or $a3, $zero, $a2
+ or $a2, $zero, $a1
+ or $a1, $zero, $a0
+ addi.d $a0, $zero, AT_FDCWD
+ b sys_mkdirat
+END(sys_mkdir)
+
+ENTRY(sys_rmdir)
+ addi.d $a2, $zero, AT_REMOVEDIR
+ or $a1, $zero, $a0
+ addi.d $a0, $zero, AT_FDCWD
+ b sys_unlinkat
+END(sys_rmdir)
+
+ENTRY(__cr_restore_rt)
+ addi.d $a7, $zero, __NR_rt_sigreturn
+ syscall 0
+END(__cr_restore_rt)
diff --git a/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl
new file mode 100644
index 000000000..83dcdab4a
--- /dev/null
+++ b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl
@@ -0,0 +1,122 @@
+#
+# System calls table, please make sure the table consist only the syscalls
+# really used somewhere in project.
+# from kernel/linux-3.10.84/arch/mips/include/uapi/asm/unistd.h Linux 64-bit syscalls are in the range from 5000 to 5999.
+#
+# __NR_name code name arguments
+# -------------------------------------------------------------------------------------------------------------------------------------------------------------
+__NR_io_setup 0 sys_io_setup (unsigned nr_events, aio_context_t *ctx)
+__NR_io_submit 2 sys_io_submit (aio_context_t ctx, long nr, struct iocb **iocbpp)
+__NR_io_getevents 4 sys_io_getevents (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo)
+__NR_fcntl 25 sys_fcntl (int fd, int type, long arg)
+__NR_ioctl 29 sys_ioctl (unsigned int fd, unsigned int cmd, unsigned long arg)
+__NR_flock 32 sys_flock (int fd, unsigned long cmd)
+__NR_mkdirat 34 sys_mkdirat (int dfd, const char *pathname, int flag)
+__NR_unlinkat 35 sys_unlinkat (int dfd, const char *pathname, int flag)
+__NR_umount2 39 sys_umount2 (char *name, int flags)
+__NR_mount 40 sys_mount (char *dev_nmae, char *dir_name, char *type, unsigned long flags, void *data)
+__NR_fallocate 47 sys_fallocate (int fd, int mode, loff_t offset, loff_t len)
+__NR_close 57 sys_close (int fd)
+__NR_openat 56 sys_openat (int dfd, const char *filename, int flags, int mode)
+__NR_lseek 62 sys_lseek (int fd, unsigned long offset, unsigned long origin)
+__NR_read 63 sys_read (int fd, void *buf, unsigned long count)
+__NR_write 64 sys_write (int fd, const void *buf, unsigned long count)
+__NR_pread64 67 sys_pread (unsigned int fd, char *buf, size_t count, loff_t pos)
+__NR_preadv 69 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h)
+__NR_ppoll 73 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)
+__NR_signalfd4 74 sys_signalfd (int fd, k_rtsigset_t *mask, size_t sizemask, int flags)
+__NR_vmsplice 75 sys_vmsplice (int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags)
+__NR_readlinkat 78 sys_readlinkat (int fd, const char *path, char *buf, int bufsize)
+__NR_timerfd_settime 86 sys_timerfd_settime (int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr)
+__NR_capget 90 sys_capget (struct cap_header *h, struct cap_data *d)
+__NR_capset 91 sys_capset (struct cap_header *h, struct cap_data *d)
+__NR_personality 92 sys_personality (unsigned int personality)
+__NR_exit 93 sys_exit (unsigned long error_code)
+__NR_exit_group 94 sys_exit_group (int error_code)
+__NR_waitid 95 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
+__NR_set_tid_address 96 sys_set_tid_address (int *tid_addr)
+__NR_futex 98 sys_futex (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3)
+__NR_set_robust_list 99 sys_set_robust_list (struct robust_list_head *head, size_t len)
+__NR_get_robust_list 100 sys_get_robust_list (int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
+__NR_nanosleep 101 sys_nanosleep (struct timespec *req, struct timespec *rem)
+__NR_getitimer 102 sys_getitimer (int which, const struct itimerval *val)
+__NR_setitimer 103 sys_setitimer (int which, const struct itimerval *val, struct itimerval *old)
+__NR_sys_timer_create 107 sys_timer_create (clockid_t which_clock, struct sigevent *timer_event_spec, kernel_timer_t *created_timer_id)
+__NR_sys_timer_gettime 108 sys_timer_gettime (int timer_id, const struct itimerspec *setting)
+__NR_sys_timer_getoverrun 109 sys_timer_getoverrun (int timer_id)
+__NR_sys_timer_settime 110 sys_timer_settime (kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting)
+__NR_sys_timer_delete 111 sys_timer_delete (kernel_timer_t timer_id)
+__NR_clock_gettime 113 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
+__NR_sched_setscheduler 119 sys_sched_setscheduler (int pid, int policy, struct sched_param *p)
+__NR_restart_syscall 128 sys_restart_syscall (void)
+__NR_kill 129 sys_kill (long pid, int sig)
+__NR_sigaltstack 132 sys_sigaltstack (const void *uss, void *uoss)
+__NR_rt_sigaction 134 sys_sigaction (int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact, size_t sigsetsize)
+__NR_rt_sigprocmask 135 sys_sigprocmask (int how, k_rtsigset_t *set, k_rtsigset_t *old, size_t sigsetsize)
+__NR_rt_sigqueueinfo 138 sys_rt_sigqueueinfo (pid_t pid, int sig, siginfo_t *info)
+__NR_rt_sigreturn 139 sys_rt_sigreturn (void)
+__NR_setpriority 140 sys_setpriority (int which, int who, int nice)
+__NR_setresuid 147 sys_setresuid (int uid, int euid, int suid)
+__NR_getresuid 148 sys_getresuid (int *uid, int *euid, int *suid)
+__NR_setresgid 149 sys_setresgid (int gid, int egid, int sgid)
+__NR_getresgid 150 sys_getresgid (int *gid, int *egid, int *sgid)
+__NR_getpgid 155 sys_getpgid (pid_t pid)
+__NR_setfsuid 151 sys_setfsuid (int fsuid)
+__NR_setfsgid 152 sys_setfsgid (int fsgid)
+__NR_getsid 156 sys_getsid (void)
+__NR_getgroups 158 sys_getgroups (int gsize, unsigned int *groups)
+__NR_setgroups 159 sys_setgroups (int gsize, unsigned int *groups)
+__NR_setrlimit 164 sys_setrlimit (int resource, struct krlimit *rlim)
+__NR_umask 166 sys_umask (int mask)
+__NR_prctl 167 sys_prctl (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
+__NR_gettimeofday 169 sys_gettimeofday (struct timeval *tv, struct timezone *tz)
+__NR_getpid 172 sys_getpid (void)
+__NR_ptrace 177 sys_ptrace (long request, pid_t pid, void *addr, void *data)
+__NR_gettid 178 sys_gettid (void)
+__NR_shmat 196 sys_shmat (int shmid, void *shmaddr, int shmflag)
+__NR_socket 198 sys_socket (int domain, int type, int protocol)
+__NR_bind 200 sys_bind (int sockfd, const struct sockaddr *addr, int addrlen)
+__NR_connect 203 sys_connect (int sockfd, struct sockaddr *addr, int addrlen)
+__NR_sendto 206 sys_sendto (int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len)
+__NR_recvfrom 207 sys_recvfrom (int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len)
+__NR_setsockopt 208 sys_setsockopt (int sockfd, int level, int optname, const void *optval, socklen_t optlen)
+__NR_getsockopt 209 sys_getsockopt (int sockfd, int level, int optname, const void *optval, socklen_t *optlen)
+__NR_shutdown 210 sys_shutdown (int sockfd, int how)
+__NR_sendmsg 211 sys_sendmsg (int sockfd, const struct msghdr *msg, int flags)
+__NR_recvmsg 212 sys_recvmsg (int sockfd, struct msghdr *msg, int flags)
+__NR_brk 214 sys_brk (void *addr)
+__NR_munmap 215 sys_munmap (void *addr, unsigned long len)
+__NR_mremap 216 sys_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr)
+__NR_clone 220 sys_clone (unsigned long flags, void *child_stack, void *parent_tid, unsigned long newtls, void *child_tid)
+__NR_mmap 222 sys_mmap (void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset)
+__NR_mprotect 226 sys_mprotect (const void *addr, unsigned long len, unsigned long prot)
+__NR_mincore 232 sys_mincore (void *addr, unsigned long size, unsigned char *vec)
+__NR_madvise 233 sys_madvise (unsigned long start, size_t len, int behavior)
+__NR_rt_tgsigqueueinfo 240 sys_rt_tgsigqueueinfo (pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+__NR_wait4 260 sys_wait4 (int pid, int *status, int options, struct rusage *ru)
+__NR_fanotify_init 262 sys_fanotify_init (unsigned int flags, unsigned int event_f_flags)
+__NR_fanotify_mark 263 sys_fanotify_mark (int fanotify_fd, unsigned int flags, uint64_t mask, int dfd, const char *pathname)
+__NR_open_by_handle_at 265 sys_open_by_handle_at (int mountdirfd, struct file_handle *handle, int flags)
+__NR_setns 268 sys_setns (int fd, int nstype)
+__NR_kcmp 272 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
+__NR_seccomp 277 sys_seccomp (unsigned int op, unsigned int flags, const char *uargs)
+__NR_memfd_create 279 sys_memfd_create (const char *name, unsigned int flags)
+__NR_userfaultfd 282 sys_userfaultfd (int flags)
+__NR_membarrier 283 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
+__NR_rseq 293 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+__NR_open_tree 428 sys_open_tree (int dirfd, const char *pathname, unsigned int flags)
+__NR_move_mount 429 sys_move_mount (int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags)
+__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags)
+__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
+__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
+__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
+__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
+__NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size)
+__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
+#__NR_dup2 ! sys_dup2 (int oldfd, int newfd)
+#__NR_rmdir ! sys_rmdir (const char *name)
+#__NR_unlink ! sys_unlink (char *pathname)
+#__NR_cacheflush ! sys_cacheflush (char *addr, int nbytes, int cache)
+#__NR_set_thread_area ! sys_set_thread_area (unsigned long *addr)
+#__NR_mkdir ! sys_mkdir (const char *name, int mode)
+#__NR_open ! sys_open (const char *filename, unsigned long flags, unsigned long mode)
diff --git a/compel/arch/loongarch64/scripts/compel-pack.lds.S b/compel/arch/loongarch64/scripts/compel-pack.lds.S
new file mode 100644
index 000000000..cfb7a2fb3
--- /dev/null
+++ b/compel/arch/loongarch64/scripts/compel-pack.lds.S
@@ -0,0 +1,32 @@
+OUTPUT_ARCH(loongarch)
+EXTERN(__export_parasite_head_start)
+
+SECTIONS
+{
+ .crblob 0x0 : {
+ *(.head.text)
+ ASSERT(DEFINED(__export_parasite_head_start),
+ "Symbol __export_parasite_head_start is missing");
+ *(.text*)
+ . = ALIGN(32);
+ *(.data*)
+ . = ALIGN(32);
+ *(.rodata*)
+ . = ALIGN(32);
+ *(.bss*)
+ . = ALIGN(32);
+ *(.got*)
+ . = ALIGN(32);
+ *(.toc*)
+ . = ALIGN(32);
+ } =0x00000000,
+
+ /DISCARD/ : {
+ *(.debug*)
+ *(.comment*)
+ *(.note*)
+ *(.group*)
+ *(.eh_frame*)
+ *(*)
+ }
+}
diff --git a/compel/arch/loongarch64/src/lib/cpu.c b/compel/arch/loongarch64/src/lib/cpu.c
new file mode 100644
index 000000000..172b90e27
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/cpu.c
@@ -0,0 +1,41 @@
+#include
+#include
+
+#include "compel-cpu.h"
+#include "common/bitops.h"
+#include "common/compiler.h"
+#include "log.h"
+
+#undef LOG_PREFIX
+#define LOG_PREFIX "cpu: "
+
+static compel_cpuinfo_t rt_info;
+static bool rt_info_done = false;
+
+void compel_set_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
+{
+}
+
+void compel_clear_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
+{
+}
+
+int compel_test_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
+{
+ return 0;
+}
+
+int compel_cpuid(compel_cpuinfo_t *c)
+{
+ return 0;
+}
+
+bool compel_cpu_has_feature(unsigned int feature)
+{
+ if (!rt_info_done) {
+ compel_cpuid(&rt_info);
+ rt_info_done = true;
+ }
+
+ return compel_test_cpu_cap(&rt_info, feature);
+}
diff --git a/compel/arch/loongarch64/src/lib/handle-elf-host.c b/compel/arch/loongarch64/src/lib/handle-elf-host.c
new file mode 100644
index 000000000..a605a5a45
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/handle-elf-host.c
@@ -0,0 +1,22 @@
+#include
+#include
+
+#include "handle-elf.h"
+#include "piegen.h"
+#include "log.h"
+
+static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = {
+ 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+extern int __handle_elf(void *mem, size_t size);
+
+int handle_binary(void *mem, size_t size)
+{
+ if (memcmp(mem, elf_ident_64_le, sizeof(elf_ident_64_le)) == 0)
+ return __handle_elf(mem, size);
+
+ pr_err("Unsupported Elf format detected\n");
+ return -EINVAL;
+}
diff --git a/compel/arch/loongarch64/src/lib/handle-elf.c b/compel/arch/loongarch64/src/lib/handle-elf.c
new file mode 100644
index 000000000..a605a5a45
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/handle-elf.c
@@ -0,0 +1,22 @@
+#include
+#include
+
+#include "handle-elf.h"
+#include "piegen.h"
+#include "log.h"
+
+static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = {
+ 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+extern int __handle_elf(void *mem, size_t size);
+
+int handle_binary(void *mem, size_t size)
+{
+ if (memcmp(mem, elf_ident_64_le, sizeof(elf_ident_64_le)) == 0)
+ return __handle_elf(mem, size);
+
+ pr_err("Unsupported Elf format detected\n");
+ return -EINVAL;
+}
diff --git a/compel/arch/loongarch64/src/lib/include/handle-elf.h b/compel/arch/loongarch64/src/lib/include/handle-elf.h
new file mode 100644
index 000000000..b0a66ef87
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/handle-elf.h
@@ -0,0 +1,8 @@
+#ifndef COMPEL_HANDLE_ELF_H__
+#define COMPEL_HANDLE_ELF_H__
+
+#include "elf64-types.h"
+
+#define arch_is_machine_supported(e_machine) (e_machine == EM_LOONGARCH)
+
+#endif /* COMPEL_HANDLE_ELF_H__ */
diff --git a/compel/arch/loongarch64/src/lib/include/syscall.h b/compel/arch/loongarch64/src/lib/include/syscall.h
new file mode 100644
index 000000000..ac3e2799a
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/syscall.h
@@ -0,0 +1,8 @@
+#ifndef __COMPEL_SYSCALL_H__
+#define __COMPEL_SYSCALL_H__
+
+#ifndef SIGSTKFLT
+#define SIGSTKFLT 16
+#endif
+
+#endif
diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h
new file mode 100644
index 000000000..21eb1309f
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/breakpoints.h
@@ -0,0 +1,6 @@
+#ifndef __COMPEL_BREAKPOINTS_H__
+#define __COMPEL_BREAKPOINTS_H__
+#define ARCH_SI_TRAP TRAP_BRKPT
+extern int ptrace_set_breakpoint(pid_t pid, void *addr);
+extern int ptrace_flush_breakpoints(pid_t pid);
+#endif
diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h
new file mode 100644
index 000000000..e568df789
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/cpu.h
@@ -0,0 +1,6 @@
+#ifndef __CR_ASM_CPU_H__
+#define __CR_ASM_CPU_H__
+
+typedef struct {
+} compel_cpuinfo_t;
+#endif /* __CR_ASM_CPU_H__ */
diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h
new file mode 100644
index 000000000..7f476d541
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/fpu.h
@@ -0,0 +1,4 @@
+#ifndef __CR_ASM_FPU_H__
+#define __CR_ASM_FPU_H__
+
+#endif /* __CR_ASM_FPU_H__ */
diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h
new file mode 100644
index 000000000..0b047a5b0
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/infect-types.h
@@ -0,0 +1,67 @@
+#ifndef UAPI_COMPEL_ASM_TYPES_H__
+#define UAPI_COMPEL_ASM_TYPES_H__
+
+#include
+
+#define SIGMAX 64
+#define SIGMAX_OLD 31
+
+/*
+ * From the Linux kernel header arch/loongarch/include/uapi/asm/ptrace.h
+ *
+ * A thread LoongArch CPU context
+ *
+ * struct user_fp_state {
+ * uint64_t fpr[32];
+ * uint64_t fcc;
+ * uint32_t fcsr;
+ * };
+ *
+ * struct user_pt_regs {
+ * unsigned long regs[32];
+ * unsigned long csr_era;
+ * unsigned long csr_badv;
+ * unsigned long reserved[11];
+ * };
+ */
+
+struct user_gp_regs {
+ uint64_t regs[32];
+ uint64_t orig_a0;
+ uint64_t pc;
+ uint64_t csr_badv;
+ uint64_t reserved[10];
+} __attribute__((aligned(8)));
+
+struct user_fp_regs {
+ uint64_t regs[32];
+ uint64_t fcc;
+ uint32_t fcsr;
+};
+
+typedef struct user_gp_regs user_regs_struct_t;
+typedef struct user_fp_regs user_fpregs_struct_t;
+
+#define user_regs_native(regs) true
+
+#define __compel_arch_fetch_thread_area(tid, th) 0
+#define compel_arch_fetch_thread_area(tctl) 0
+#define compel_arch_get_tls_task(ctl, tls)
+#define compel_arch_get_tls_thread(tctl, tls)
+
+#define REG_RES(r) ((uint64_t)(r).regs[4])
+#define REG_IP(r) ((uint64_t)(r).pc)
+#define REG_SP(r) ((uint64_t)(r).regs[3])
+#define REG_SYSCALL_NR(r) ((uint64_t)(r).regs[11])
+#define SET_REG_IP(r, val) ((r).pc = (val))
+
+#define GPR_NUM 32
+#define FPR_NUM 32
+
+#define __NR(syscall, compat) \
+ ({ \
+ (void)compat; \
+ __NR_##syscall; \
+ })
+
+#endif /* UAPI_COMPEL_ASM_TYPES_H__ */
diff --git a/compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h
new file mode 100644
index 000000000..fcb545a1d
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/include/uapi/asm/sigframe.h
@@ -0,0 +1,86 @@
+#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__
+#define UAPI_COMPEL_ASM_SIGFRAME_H__
+
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+
+#define rt_sigcontext sigcontext
+/* sigcontext defined in usr/include/uapi/asm/sigcontext.h*/
+#include
+typedef __u32 u32;
+
+typedef struct sigcontext_t {
+ __u64 pc;
+ __u64 regs[32];
+ __u32 flags;
+ __u64 extcontext[0] __attribute__((__aligned__(16)));
+} sigcontext_t;
+
+typedef struct context_info_t {
+ __u32 magic;
+ __u32 size;
+ __u64 padding;
+} context_info_t;
+
+#define FPU_CTX_MAGIC 0x46505501
+#define FPU_CTX_ALIGN 8
+typedef struct fpu_context_t {
+ __u64 regs[32];
+ __u64 fcc;
+ __u64 fcsr;
+} fpu_context_t;
+
+typedef struct ucontext {
+ unsigned long uc_flags;
+ struct ucontext *uc_link;
+ stack_t uc_stack;
+ sigset_t uc_sigmask;
+ __u8 __unused[1024 / 8 - sizeof(sigset_t)];
+ sigcontext_t uc_mcontext;
+} ucontext;
+
+/* Copy from the kernel source arch/loongarch/kernel/signal.c */
+struct rt_sigframe {
+ rt_siginfo_t rs_info;
+ ucontext rs_uc;
+};
+
+#define RT_SIGFRAME_UC(rt_sigframe) (&(rt_sigframe->rs_uc))
+#define RT_SIGFRAME_SIGMASK(rt_sigframe) ((k_rtsigset_t *)&RT_SIGFRAME_UC(rt_sigframe)->uc_sigmask)
+#define RT_SIGFRAME_SIGCTX(rt_sigframe) (&(RT_SIGFRAME_UC(rt_sigframe)->uc_mcontext))
+#define RT_SIGFRAME_REGIP(rt_sigframe) ((long unsigned int)(RT_SIGFRAME_SIGCTX(rt_sigframe)->pc))
+#define RT_SIGFRAME_HAS_FPU(rt_sigframe) (1)
+
+#define RT_SIGFRAME_FPU(rt_sigframe) \
+ ({ \
+ context_info_t *ctx = (context_info_t *)RT_SIGFRAME_SIGCTX(rt_sigframe)->extcontext; \
+ ctx->magic = FPU_CTX_MAGIC; \
+ ctx->size = sizeof(context_info_t) + sizeof(fpu_context_t); \
+ (fpu_context_t *)((char *)ctx + sizeof(context_info_t)); \
+ })
+
+#define RT_SIGFRAME_OFFSET(rt_sigframe) 0
+
+/* clang-format off */
+#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe) \
+ asm volatile( \
+ "addi.d $sp, %0, 0 \n" \
+ "addi.d $a7, $zero, "__stringify(__NR_rt_sigreturn)" \n" \
+ "syscall 0" \
+ : \
+ :"r"(new_sp) \
+ : "$a7", "memory")
+/* clang-format on */
+
+int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe);
+
+#define rt_sigframe_erase_sigset(sigframe) memset(RT_SIGFRAME_SIGMASK(sigframe), 0, sizeof(k_rtsigset_t))
+#define rt_sigframe_copy_sigset(sigframe, from) memcpy(RT_SIGFRAME_SIGMASK(sigframe), from, sizeof(k_rtsigset_t))
+
+#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */
diff --git a/compel/arch/loongarch64/src/lib/infect.c b/compel/arch/loongarch64/src/lib/infect.c
new file mode 100644
index 000000000..190c39227
--- /dev/null
+++ b/compel/arch/loongarch64/src/lib/infect.c
@@ -0,0 +1,204 @@
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include "errno.h"
+#include
+#include
+#include "common/err.h"
+#include "common/page.h"
+#include "asm/infect-types.h"
+#include "ptrace.h"
+#include "infect.h"
+#include "infect-priv.h"
+#include "log.h"
+#include "common/bug.h"
+
+/*
+ * Injected syscall instruction
+ * loongarch64 is Little Endian
+ */
+const char code_syscall[] = {
+ 0x00, 0x00, 0x2b, 0x00, /* syscall */
+ 0x00, 0x00, 0x2a, 0x00 /* break */
+};
+
+int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
+{
+ sigcontext_t *sc;
+ fpu_context_t *fpu;
+
+ sc = RT_SIGFRAME_SIGCTX(sigframe);
+ memcpy(sc->regs, regs->regs, sizeof(regs->regs));
+ sc->pc = regs->pc;
+
+ fpu = RT_SIGFRAME_FPU(sigframe);
+ memcpy(fpu->regs, fpregs->regs, sizeof(fpregs->regs));
+ fpu->fcc = fpregs->fcc;
+ fpu->fcsr = fpregs->fcsr;
+ return 0;
+}
+
+int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe)
+{
+ return 0;
+}
+
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
+ void *arg, __maybe_unused unsigned long flags)
+{
+ user_fpregs_struct_t tmp, *fpregs = ext_regs ? ext_regs : &tmp;
+ struct iovec iov;
+ int ret;
+
+ pr_info("Dumping GP/FPU registers for %d\n", pid);
+
+ iov.iov_base = regs;
+ iov.iov_len = sizeof(user_regs_struct_t);
+ if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov))) {
+ pr_perror("Failed to obtain CPU registers for %d", pid);
+ goto err;
+ }
+
+ /*
+ * Refer to Linux kernel arch/loongarch/kernel/signal.c
+ */
+ if (regs->regs[0]) {
+ switch (regs->regs[4]) {
+ case -ERESTARTNOHAND:
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ regs->regs[4] = regs->orig_a0;
+ regs->pc -= 4;
+ break;
+ case -ERESTART_RESTARTBLOCK:
+ regs->regs[4] = regs->orig_a0;
+ regs->regs[11] = __NR_restart_syscall;
+ regs->pc -= 4;
+ break;
+ }
+ regs->regs[0] = 0; /* Don't deal with this again. */
+ }
+
+ iov.iov_base = fpregs;
+ iov.iov_len = sizeof(user_fpregs_struct_t);
+ if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) {
+ pr_perror("Failed to obtain FPU registers for %d", pid);
+ goto err;
+ }
+
+ ret = save(pid, arg, regs, fpregs);
+err:
+ return 0;
+}
+
+int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
+{
+ struct iovec iov;
+
+ pr_info("Restoring GP/FPU registers for %d\n", pid);
+
+ iov.iov_base = ext_regs;
+ iov.iov_len = sizeof(*ext_regs);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov)) {
+ pr_perror("Failed to set FPU registers for %d", pid);
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Registers $4 ~ $11 represents arguments a0 ~ a7, especially a7 is
+ * used as syscall number.
+ */
+int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6)
+{
+ int err;
+ user_regs_struct_t regs = ctl->orig.regs;
+
+ regs.regs[11] = (unsigned long)nr;
+ regs.regs[4] = arg1;
+ regs.regs[5] = arg2;
+ regs.regs[6] = arg3;
+ regs.regs[7] = arg4;
+ regs.regs[8] = arg5;
+ regs.regs[9] = arg6;
+ err = compel_execute_syscall(ctl, ®s, code_syscall);
+
+ *ret = regs.regs[4];
+
+ return err;
+}
+
+void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+ long map;
+ int err;
+
+ err = compel_syscall(ctl, __NR_mmap, &map, (unsigned long)addr, length, prot, flags, fd, offset >> PAGE_SHIFT);
+
+ if (err < 0 || IS_ERR_VALUE(map)) {
+ pr_err("remote mmap() failed: %s\n", strerror(-map));
+ return NULL;
+ }
+
+ return (void *)map;
+}
+
+/*
+ * regs must be inited when calling this function from original context
+ */
+void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
+{
+ regs->pc = new_ip;
+ if (stack)
+ regs->regs[4] = (unsigned long)stack;
+}
+
+bool arch_can_dump_task(struct parasite_ctl *ctl)
+{
+ return true;
+}
+
+int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
+{
+ long ret;
+ int err;
+
+ err = compel_syscall(ctl, __NR_sigaltstack, &ret, 0, (unsigned long)&s->rs_uc.uc_stack, 0, 0, 0, 0);
+ return err ? err : ret;
+}
+
+/*
+ * TODO: add feature
+ */
+int ptrace_set_breakpoint(pid_t pid, void *addr)
+{
+ return 0;
+}
+
+int ptrace_flush_breakpoints(pid_t pid)
+{
+ return 0;
+}
+
+/*
+ * Refer to Linux kernel arch/loongarch/include/asm/processor.h
+ */
+#define TASK_SIZE32 (1UL) << 31
+#define TASK_SIZE64_MIN (1UL) << 40
+#define TASK_SIZE64_MAX (1UL) << 48
+
+unsigned long compel_task_size(void)
+{
+ unsigned long task_size;
+ for (task_size = TASK_SIZE64_MIN; task_size < TASK_SIZE64_MAX; task_size <<= 1)
+ if (munmap((void *)task_size, page_size()))
+ break;
+ return task_size;
+}
diff --git a/compel/arch/mips/plugins/include/asm/prologue.h b/compel/arch/mips/plugins/include/asm/prologue.h
new file mode 100644
index 000000000..c19ce54d7
--- /dev/null
+++ b/compel/arch/mips/plugins/include/asm/prologue.h
@@ -0,0 +1,35 @@
+#ifndef __ASM_PROLOGUE_H__
+#define __ASM_PROLOGUE_H__
+
+#ifndef __ASSEMBLY__
+
+#include
+#include
+#include
+
+#include
+
+#define sys_recv(sockfd, ubuf, size, flags) sys_recvfrom(sockfd, ubuf, size, flags, NULL, NULL)
+
+typedef struct prologue_init_args {
+ struct sockaddr_un ctl_sock_addr;
+ unsigned int ctl_sock_addr_len;
+
+ unsigned int arg_s;
+ void *arg_p;
+
+ void *sigframe;
+} prologue_init_args_t;
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Reserve enough space for sigframe.
+ *
+ * FIXME It is rather should be taken from sigframe header.
+ */
+#define PROLOGUE_SGFRAME_SIZE 4096
+
+#define PROLOGUE_INIT_ARGS_SIZE 1024
+
+#endif /* __ASM_PROLOGUE_H__ */
diff --git a/compel/arch/mips/plugins/include/asm/syscall-types.h b/compel/arch/mips/plugins/include/asm/syscall-types.h
new file mode 100644
index 000000000..bd7e8dfb3
--- /dev/null
+++ b/compel/arch/mips/plugins/include/asm/syscall-types.h
@@ -0,0 +1,36 @@
+#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__
+#define COMPEL_ARCH_SYSCALL_TYPES_H__
+
+/* Types for sigaction, sigprocmask syscalls */
+typedef void rt_signalfn_t(int, siginfo_t *, void *);
+typedef rt_signalfn_t *rt_sighandler_t;
+
+typedef void rt_restorefn_t(void);
+typedef rt_restorefn_t *rt_sigrestore_t;
+
+#define SA_RESTORER 0x04000000
+
+/** refer to linux-3.10/arch/mips/include/uapi/asm/signal.h*/
+#define _KNSIG 128
+#define _NSIG_BPW 64
+
+#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
+
+/*
+ * Note: as k_rtsigset_t is the same size for 32-bit and 64-bit,
+ * sig defined as uint64_t rather than (unsigned long) - for the
+ * purpose if we ever going to support native 32-bit compilation.
+ */
+
+typedef struct {
+ uint64_t sig[_KNSIG_WORDS];
+} k_rtsigset_t;
+
+typedef struct {
+ rt_sighandler_t rt_sa_handler;
+ unsigned long rt_sa_flags;
+ rt_sigrestore_t rt_sa_restorer;
+ k_rtsigset_t rt_sa_mask;
+} rt_sigaction_t;
+
+#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */
diff --git a/compel/arch/mips/plugins/include/features.h b/compel/arch/mips/plugins/include/features.h
new file mode 100644
index 000000000..0f35725fa
--- /dev/null
+++ b/compel/arch/mips/plugins/include/features.h
@@ -0,0 +1,6 @@
+#ifndef __COMPEL_ARCH_FEATURES_H
+#define __COMPEL_ARCH_FEATURES_H
+
+#define ARCH_HAS_MEMCPY
+
+#endif /* __COMPEL_ARCH_FEATURES_H */
diff --git a/compel/arch/mips/plugins/std/memcpy.S b/compel/arch/mips/plugins/std/memcpy.S
new file mode 100644
index 000000000..5d13a1590
--- /dev/null
+++ b/compel/arch/mips/plugins/std/memcpy.S
@@ -0,0 +1,22 @@
+
+#include "common/asm/linkage.h"
+
+ .section .head.text, "ax"
+ENTRY(memcpy)
+ .set noreorder
+ dadd v0,zero,a0
+ daddiu t1,zero,0
+loop:
+ beq t1,a2,exit
+ nop
+ lb t2,0(a1)
+ sb t2,0(a0)
+ daddiu t1,t1,1
+ daddiu a0,a0,1
+ daddiu a1,a1,1
+ j loop
+ nop
+exit:
+ jr ra
+ nop
+END(memcpy)
diff --git a/compel/arch/mips/plugins/std/parasite-head.S b/compel/arch/mips/plugins/std/parasite-head.S
new file mode 100644
index 000000000..33d04db96
--- /dev/null
+++ b/compel/arch/mips/plugins/std/parasite-head.S
@@ -0,0 +1,14 @@
+
+#include "common/asm/linkage.h"
+
+ .section .head.text, "ax"
+ENTRY(__export_parasite_head_start)
+ .set push
+ .set noreorder
+ jal parasite_service
+ nop
+ .byte 0x0d, 0x00, 0x00, 0x00 //break
+ .set pop
+// .byte 0x40,0x01,0x00,0x00 //pause
+END(__export_parasite_head_start)
+
diff --git a/compel/arch/mips/plugins/std/syscalls/Makefile.syscalls b/compel/arch/mips/plugins/std/syscalls/Makefile.syscalls
new file mode 100644
index 000000000..ef75f9e95
--- /dev/null
+++ b/compel/arch/mips/plugins/std/syscalls/Makefile.syscalls
@@ -0,0 +1,117 @@
+std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/syscalls-64.o
+sys-proto-types := $(obj)/include/uapi/std/syscall-types.h
+sys-proto-generic := $(obj)/include/uapi/std/syscall.h
+sys-codes-generic := $(obj)/include/uapi/std/syscall-codes.h
+sys-codes = $(obj)/include/uapi/std/syscall-codes-$(1).h
+sys-proto = $(obj)/include/uapi/std/syscall-$(1).h
+sys-def = $(PLUGIN_ARCH_DIR)/std/syscalls/syscall_$(1).tbl
+sys-asm = $(PLUGIN_ARCH_DIR)/std/syscalls-$(1).S
+sys-asm-common-name = std/syscalls/syscall-common-mips-$(1).S
+sys-asm-common = $(PLUGIN_ARCH_DIR)/$(sys-asm-common-name)
+sys-asm-types := $(obj)/include/uapi/std/asm/syscall-types.h
+sys-exec-tbl = $(PLUGIN_ARCH_DIR)/std/sys-exec-tbl-$(1).c
+
+sys-bits := 64
+
+AV := $$$$
+
+define gen-rule-sys-codes
+$(sys-codes): $(sys-def) $(sys-proto-types)
+ $(call msg-gen, $$@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $$@
+ $(Q) echo "#ifndef ASM_SYSCALL_CODES_H_$(1)__" >> $$@
+ $(Q) echo "#define ASM_SYSCALL_CODES_H_$(1)__" >> $$@
+ $(Q) cat $$< | awk '/^__NR/{SYSN=$(AV)1; \
+ sub("^__NR", "SYS", SYSN); \
+ print "\n#ifndef ", $(AV)1; \
+ print "#define", $(AV)1, $(AV)2; \
+ print "#endif"; \
+ print "\n#ifndef ", SYSN; \
+ print "#define ", SYSN, $(AV)1; \
+ print "#endif";}' >> $$@
+ $(Q) echo "#endif /* ASM_SYSCALL_CODES_H_$(1)__ */" >> $$@
+endef
+
+define gen-rule-sys-proto
+$(sys-proto): $(sys-def) $(sys-proto-types)
+ $(call msg-gen, $$@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $$@
+ $(Q) echo "#ifndef ASM_SYSCALL_PROTO_H_$(1)__" >> $$@
+ $(Q) echo "#define ASM_SYSCALL_PROTO_H_$(1)__" >> $$@
+ $(Q) echo '#include ' >> $$@
+ $(Q) echo '#include ' >> $$@
+ifeq ($(1),32)
+ $(Q) echo '#include "asm/syscall32.h"' >> $$@
+endif
+ $(Q) cat $$< | awk '/^__NR/{print "extern long", $(AV)3, \
+ substr($(AV)0, index($(AV)0,$(AV)4)), ";"}' >> $$@
+ $(Q) echo "#endif /* ASM_SYSCALL_PROTO_H_$(1)__ */" >> $$@
+endef
+
+define gen-rule-sys-asm
+$(sys-asm): $(sys-def) $(sys-asm-common) $(sys-codes) $(sys-proto) $(sys-proto-types)
+ $(call msg-gen, $$@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $$@
+ $(Q) echo '#include ' >> $$@
+ $(Q) echo '#include "$(sys-asm-common-name)"' >> $$@
+ $(Q) cat $$< | awk '/^__NR/{print "SYSCALL(", $(AV)3, ",", $(AV)2, ")"}' >> $$@
+endef
+
+define gen-rule-sys-exec-tbl
+$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic) $(sys-proto-types)
+ $(call msg-gen, $$@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $$@
+ $(Q) cat $$< | awk '/^__NR/{print \
+ "SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}' >> $$@
+endef
+
+$(sys-codes-generic): $(sys-proto-types)
+ $(call msg-gen, $@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $@
+ $(Q) echo "#ifndef __ASM_CR_SYSCALL_CODES_H__" >> $@
+ $(Q) echo "#define __ASM_CR_SYSCALL_CODES_H__" >> $@
+ $(Q) echo '#include ' >> $@
+ $(Q) cat $< | awk '/^__NR/{NR32=$$1; \
+ sub("^__NR", "__NR32", NR32); \
+ print "\n#ifndef ", NR32; \
+ print "#define ", NR32, $$2; \
+ print "#endif";}' >> $@
+ $(Q) echo "#endif /* __ASM_CR_SYSCALL_CODES_H__ */" >> $@
+mrproper-y += $(sys-codes-generic)
+
+$(sys-proto-generic): $(strip $(call map,sys-proto,$(sys-bits))) $(sys-proto-types)
+ $(call msg-gen, $@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $@
+ $(Q) echo "#ifndef __ASM_CR_SYSCALL_PROTO_H__" >> $@
+ $(Q) echo "#define __ASM_CR_SYSCALL_PROTO_H__" >> $@
+ $(Q) echo "" >> $@
+ $(Q) echo '#include ' >> $@
+ $(Q) echo "" >> $@
+ $(Q) echo "#endif /* __ASM_CR_SYSCALL_PROTO_H__ */" >> $@
+mrproper-y += $(sys-proto-generic)
+
+define gen-rule-sys-exec-tbl
+$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic)
+ $(call msg-gen, $$@)
+ $(Q) echo "/* Autogenerated, don't edit */" > $$@
+ $(Q) cat $$< | awk '/^__NR/{print \
+ "SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}' >> $$@
+endef
+
+$(eval $(call map,gen-rule-sys-codes,$(sys-bits)))
+$(eval $(call map,gen-rule-sys-proto,$(sys-bits)))
+$(eval $(call map,gen-rule-sys-asm,$(sys-bits)))
+$(eval $(call map,gen-rule-sys-exec-tbl,$(sys-bits)))
+
+$(sys-asm-types): $(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h
+ $(call msg-gen, $@)
+ $(Q) ln -s ../../../../../../$(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h $(sys-asm-types)
+
+std-headers-deps += $(call sys-codes,$(sys-bits))
+std-headers-deps += $(call sys-proto,$(sys-bits))
+std-headers-deps += $(call sys-asm,$(sys-bits))
+std-headers-deps += $(call sys-exec-tbl,$(sys-bits))
+std-headers-deps += $(sys-codes-generic)
+std-headers-deps += $(sys-proto-generic)
+std-headers-deps += $(sys-asm-types)
+mrproper-y += $(std-headers-deps)
diff --git a/compel/arch/mips/plugins/std/syscalls/syscall-common-mips-64.S b/compel/arch/mips/plugins/std/syscalls/syscall-common-mips-64.S
new file mode 100644
index 000000000..3478488da
--- /dev/null
+++ b/compel/arch/mips/plugins/std/syscalls/syscall-common-mips-64.S
@@ -0,0 +1,12 @@
+#include "common/asm/linkage.h"
+
+#define SYSCALL(name, opcode) \
+ ENTRY(name); \
+ li v0, opcode; \
+ syscall; \
+ jr ra; \
+ nop; \
+ END(name)
+
+ENTRY(__cr_restore_rt)
+END(__cr_restore_rt)
diff --git a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl
new file mode 100644
index 000000000..ad3d44634
--- /dev/null
+++ b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl
@@ -0,0 +1,123 @@
+#
+# System calls table, please make sure the table consist only the syscalls
+# really used somewhere in project.
+# from kernel/linux-3.10.84/arch/mips/include/uapi/asm/unistd.h Linux 64-bit syscalls are in the range from 5000 to 5999.
+#
+# __NR_name code name arguments
+# -------------------------------------------------------------------------------------------------------------------------------------------------------------
+__NR_read 5000 sys_read (int fd, void *buf, unsigned long count)
+__NR_write 5001 sys_write (int fd, const void *buf, unsigned long count)
+__NR_open 5002 sys_open (const char *filename, unsigned long flags, unsigned long mode)
+__NR_close 5003 sys_close (int fd)
+__NR_lseek 5008 sys_lseek (int fd, unsigned long offset, unsigned long origin)
+__NR_mmap 5009 sys_mmap (void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset)
+__NR_mprotect 5010 sys_mprotect (const void *addr, unsigned long len, unsigned long prot)
+__NR_munmap 5011 sys_munmap (void *addr, unsigned long len)
+__NR_brk 5012 sys_brk (void *addr)
+__NR_rt_sigaction 5013 sys_sigaction (int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact, size_t sigsetsize)
+__NR_rt_sigprocmask 5014 sys_sigprocmask (int how, k_rtsigset_t *set, k_rtsigset_t *old, size_t sigsetsize)
+__NR_rt_sigreturn 5211 sys_rt_sigreturn (void)
+__NR_ioctl 5015 sys_ioctl (unsigned int fd, unsigned int cmd, unsigned long arg)
+__NR_pread64 5016 sys_pread (unsigned int fd, char *buf, size_t count, loff_t pos)
+__NR_mremap 5024 sys_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr)
+__NR_mincore 5026 sys_mincore (void *addr, unsigned long size, unsigned char *vec)
+__NR_madvise 5027 sys_madvise (unsigned long start, size_t len, int behavior)
+__NR_shmat 5029 sys_shmat (int shmid, void *shmaddr, int shmflag)
+__NR_dup2 5032 sys_dup2 (int oldfd, int newfd)
+__NR_nanosleep 5034 sys_nanosleep (struct timespec *req, struct timespec *rem)
+__NR_getitimer 5035 sys_getitimer (int which, const struct itimerval *val)
+__NR_setitimer 5036 sys_setitimer (int which, const struct itimerval *val, struct itimerval *old)
+__NR_getpid 5038 sys_getpid (void)
+__NR_socket 5040 sys_socket (int domain, int type, int protocol)
+__NR_connect 5041 sys_connect (int sockfd, struct sockaddr *addr, int addrlen)
+__NR_sendto 5043 sys_sendto (int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len)
+__NR_recvfrom 5044 sys_recvfrom (int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len)
+__NR_sendmsg 5045 sys_sendmsg (int sockfd, const struct msghdr *msg, int flags)
+__NR_recvmsg 5046 sys_recvmsg (int sockfd, struct msghdr *msg, int flags)
+__NR_shutdown 5047 sys_shutdown (int sockfd, int how)
+__NR_bind 5048 sys_bind (int sockfd, const struct sockaddr *addr, int addrlen)
+__NR_setsockopt 5053 sys_setsockopt (int sockfd, int level, int optname, const void *optval, socklen_t optlen)
+__NR_getsockopt 5054 sys_getsockopt (int sockfd, int level, int optname, const void *optval, socklen_t *optlen)
+__NR_clone 5055 sys_clone (unsigned long flags, void *child_stack, void *parent_tid, unsigned long newtls, void *child_tid)
+__NR_exit 5058 sys_exit (unsigned long error_code)
+__NR_wait4 5059 sys_wait4 (int pid, int *status, int options, struct rusage *ru)
+__NR_kill 5060 sys_kill (long pid, int sig)
+__NR_fcntl 5070 sys_fcntl (int fd, int type, long arg)
+__NR_flock 5071 sys_flock (int fd, unsigned long cmd)
+__NR_mkdir 5081 sys_mkdir (const char *name, int mode)
+__NR_rmdir 5082 sys_rmdir (const char *name)
+__NR_unlink 5085 sys_unlink (char *pathname)
+__NR_umask 5093 sys_umask (int mask)
+__NR_gettimeofday 5094 sys_gettimeofday (struct timeval *tv, struct timezone *tz)
+__NR_ptrace 5099 sys_ptrace (long request, pid_t pid, void *addr, void *data)
+__NR_getgroups 5113 sys_getgroups (int gsize, unsigned int *groups)
+__NR_setgroups 5114 sys_setgroups (int gsize, unsigned int *groups)
+__NR_setresuid 5115 sys_setresuid (int uid, int euid, int suid)
+__NR_getresuid 5116 sys_getresuid (int *uid, int *euid, int *suid)
+__NR_setresgid 5117 sys_setresgid (int gid, int egid, int sgid)
+__NR_getresgid 5118 sys_getresgid (int *gid, int *egid, int *sgid)
+__NR_getpgid 5119 sys_getpgid (pid_t pid)
+__NR_setfsuid 5120 sys_setfsuid (int fsuid)
+__NR_setfsgid 5121 sys_setfsgid (int fsgid)
+__NR_getsid 5122 sys_getsid (void)
+__NR_capget 5123 sys_capget (struct cap_header *h, struct cap_data *d)
+__NR_capset 5124 sys_capset (struct cap_header *h, struct cap_data *d)
+__NR_rt_sigqueueinfo 5127 sys_rt_sigqueueinfo (pid_t pid, int sig, siginfo_t *info)
+__NR_sigaltstack 5129 sys_sigaltstack (const void *uss, void *uoss)
+__NR_personality 5132 sys_personality (unsigned int personality)
+__NR_setpriority 5138 sys_setpriority (int which, int who, int nice)
+__NR_sched_setscheduler 5141 sys_sched_setscheduler (int pid, int policy, struct sched_param *p)
+__NR_prctl 5153 sys_prctl (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
+__NR_setrlimit 5155 sys_setrlimit (int resource, struct krlimit *rlim)
+__NR_mount 5160 sys_mount (char *dev_nmae, char *dir_name, char *type, unsigned long flags, void *data)
+__NR_umount2 5161 sys_umount2 (char *name, int flags)
+__NR_gettid 5178 sys_gettid (void)
+__NR_futex 5194 sys_futex (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3)
+__NR_cacheflush 5197 sys_cacheflush (char *addr, int nbytes, int cache)
+__NR_io_setup 5200 sys_io_setup (unsigned nr_events, aio_context_t *ctx)
+__NR_io_getevents 5202 sys_io_getevents (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo)
+__NR_io_submit 5203 sys_io_submit (aio_context_t ctx, long nr, struct iocb **iocbpp)
+__NR_set_tid_address 5212 sys_set_tid_address (int *tid_addr)
+__NR_restart_syscall 5213 sys_restart_syscall (void)
+__NR_sys_timer_create 5216 sys_timer_create (clockid_t which_clock, struct sigevent *timer_event_spec, kernel_timer_t *created_timer_id)
+__NR_sys_timer_settime 5217 sys_timer_settime (kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting)
+__NR_sys_timer_gettime 5218 sys_timer_gettime (int timer_id, const struct itimerspec *setting)
+__NR_sys_timer_getoverrun 5219 sys_timer_getoverrun (int timer_id)
+__NR_sys_timer_delete 5220 sys_timer_delete (kernel_timer_t timer_id)
+__NR_clock_gettime 5222 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
+__NR_exit_group 5205 sys_exit_group (int error_code)
+__NR_set_thread_area 5242 sys_set_thread_area (unsigned long *addr)
+__NR_openat 5247 sys_openat (int dfd, const char *filename, int flags, int mode)
+__NR_waitid 5237 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
+__NR_readlinkat 5257 sys_readlinkat (int fd, const char *path, char *buf, int bufsize)
+__NR_ppoll 5261 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)
+__NR_set_robust_list 5268 sys_set_robust_list (struct robust_list_head *head, size_t len)
+__NR_get_robust_list 5269 sys_get_robust_list (int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
+__NR_fallocate 5279 sys_fallocate (int fd, int mode, loff_t offset, loff_t len)
+__NR_seccomp 5312 sys_seccomp (unsigned int op, unsigned int flags, const char *uargs)
+__NR_vmsplice 5266 sys_vmsplice (int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags)
+__NR_timerfd_settime 5282 sys_timerfd_settime (int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr)
+__NR_signalfd4 5283 sys_signalfd (int fd, k_rtsigset_t *mask, size_t sizemask, int flags)
+__NR_preadv 5289 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h)
+__NR_rt_tgsigqueueinfo 5291 sys_rt_tgsigqueueinfo (pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+__NR_fanotify_init 5295 sys_fanotify_init (unsigned int flags, unsigned int event_f_flags)
+__NR_fanotify_mark 5296 sys_fanotify_mark (int fanotify_fd, unsigned int flags, uint64_t mask, int dfd, const char *pathname)
+__NR_open_by_handle_at 5299 sys_open_by_handle_at (int mountdirfd, struct file_handle *handle, int flags)
+__NR_setns 5303 sys_setns (int fd, int nstype)
+__NR_kcmp 5306 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
+__NR_memfd_create 5314 sys_memfd_create (const char *name, unsigned int flags)
+__NR_userfaultfd 5317 sys_userfaultfd (int flags)
+
+##TODO for kernel
+__NR_open_tree 5428 sys_open_tree (int dirfd, const char *pathname, unsigned int flags)
+__NR_move_mount 5429 sys_move_mount (int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags)
+__NR_fsopen 5430 sys_fsopen (char *fsname, unsigned int flags)
+__NR_fsconfig 5431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
+__NR_fsmount 5432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
+__NR_clone3 5435 sys_clone3 (struct clone_args *uargs, size_t size)
+__NR_close_range 5436 sys_close_range (unsigned int fd, unsigned int max_fd, unsigned int flags)
+__NR_pidfd_open 5434 sys_pidfd_open (pid_t pid, unsigned int flags)
+__NR_openat2 5437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size)
+__NR_pidfd_getfd 5438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
+__NR_rseq 5327 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+__NR_membarrier 5318 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
diff --git a/compel/arch/mips/scripts/compel-pack-compat.lds.S b/compel/arch/mips/scripts/compel-pack-compat.lds.S
new file mode 100644
index 000000000..fe54847aa
--- /dev/null
+++ b/compel/arch/mips/scripts/compel-pack-compat.lds.S
@@ -0,0 +1,3 @@
+OUTPUT_ARCH(mips)
+EXTERN(__export_parasite_head_start)
+ASSERT(0,"Compatible PIEs are unsupported on mips")
diff --git a/compel/arch/mips/scripts/compel-pack.lds.S b/compel/arch/mips/scripts/compel-pack.lds.S
new file mode 100644
index 000000000..370cac68f
--- /dev/null
+++ b/compel/arch/mips/scripts/compel-pack.lds.S
@@ -0,0 +1,33 @@
+OUTPUT_ARCH(mips)
+EXTERN(__export_parasite_head_start)
+
+SECTIONS
+{
+ .text : {
+ *(.head.text)
+ ASSERT(DEFINED(__export_parasite_head_start),
+ "Symbol __export_parasite_head_start is missing");
+ *(.text*)
+ *(.compel.exit)
+ *(.compel.init)
+ /* .rodata section*/
+ *(.rodata*)
+ *(.got*)
+ /* .data section */
+ *(.data*)
+ *(.bss*)
+ *(.sbss*)
+ *(.toc*)
+ }
+
+ /DISCARD/ : { /*segments need to discard */
+ *(.debug*)
+ *(.pdr)
+ *(.comment*)
+ *(.note*)
+ *(.group*)
+ *(.eh_frame*)
+ *(.MIPS.options)
+ *(.gnu.attributes)
+ }
+}
diff --git a/compel/arch/mips/src/lib/cpu.c b/compel/arch/mips/src/lib/cpu.c
new file mode 100644
index 000000000..172b90e27
--- /dev/null
+++ b/compel/arch/mips/src/lib/cpu.c
@@ -0,0 +1,41 @@
+#include
+#include
+
+#include "compel-cpu.h"
+#include "common/bitops.h"
+#include "common/compiler.h"
+#include "log.h"
+
+#undef LOG_PREFIX
+#define LOG_PREFIX "cpu: "
+
+static compel_cpuinfo_t rt_info;
+static bool rt_info_done = false;
+
+void compel_set_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
+{
+}
+
+void compel_clear_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
+{
+}
+
+int compel_test_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
+{
+ return 0;
+}
+
+int compel_cpuid(compel_cpuinfo_t *c)
+{
+ return 0;
+}
+
+bool compel_cpu_has_feature(unsigned int feature)
+{
+ if (!rt_info_done) {
+ compel_cpuid(&rt_info);
+ rt_info_done = true;
+ }
+
+ return compel_test_cpu_cap(&rt_info, feature);
+}
diff --git a/compel/arch/mips/src/lib/handle-elf-host.c b/compel/arch/mips/src/lib/handle-elf-host.c
new file mode 120000
index 000000000..fe4611886
--- /dev/null
+++ b/compel/arch/mips/src/lib/handle-elf-host.c
@@ -0,0 +1 @@
+handle-elf.c
\ No newline at end of file
diff --git a/compel/arch/mips/src/lib/handle-elf.c b/compel/arch/mips/src/lib/handle-elf.c
new file mode 100644
index 000000000..e086761c2
--- /dev/null
+++ b/compel/arch/mips/src/lib/handle-elf.c
@@ -0,0 +1,35 @@
+#include
+#include
+
+#include "handle-elf.h"
+#include "piegen.h"
+#include "log.h"
+
+extern int __handle_elf(void *mem, size_t size);
+
+int handle_binary(void *mem, size_t size)
+{
+ Elf64_Ehdr *ehdr = (Elf64_Ehdr *)mem;
+
+ /* check ELF magic */
+ if (ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
+ ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
+ ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
+ ehdr->e_ident[EI_MAG3] != ELFMAG3) {
+ pr_err("Invalid ELF magic\n");
+ return -EINVAL;
+ }
+
+ /* check ELF class and data encoding */
+ if (ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
+ ehdr->e_ident[EI_DATA] != ELFDATA2LSB) {
+ pr_err("Unsupported ELF class or data encoding\n");
+ return -EINVAL;
+ }
+
+ if (ehdr->e_ident[EI_ABIVERSION] != 0) {
+ pr_warn("Unusual ABI version: %d\n", ehdr->e_ident[EI_ABIVERSION]);
+ }
+
+ return __handle_elf(mem, size);
+}
diff --git a/compel/arch/mips/src/lib/include/handle-elf.h b/compel/arch/mips/src/lib/include/handle-elf.h
new file mode 100644
index 000000000..aa650a2f6
--- /dev/null
+++ b/compel/arch/mips/src/lib/include/handle-elf.h
@@ -0,0 +1,8 @@
+#ifndef COMPEL_HANDLE_ELF_H__
+#define COMPEL_HANDLE_ELF_H__
+
+#include "elf64-types.h"
+
+#define arch_is_machine_supported(e_machine) (e_machine == EM_MIPS)
+
+#endif /* COMPEL_HANDLE_ELF_H__ */
diff --git a/compel/arch/mips/src/lib/include/ldsodefs.h b/compel/arch/mips/src/lib/include/ldsodefs.h
new file mode 100644
index 000000000..97e79755d
--- /dev/null
+++ b/compel/arch/mips/src/lib/include/ldsodefs.h
@@ -0,0 +1,130 @@
+/*
+ * Run-time dynamic linker data structures for loaded ELF shared objects.
+ * Copyright (C) 2000-2014 Free Software Foundation, Inc.
+ * This file is part of the GNU C Library.
+ *
+ * The GNU C Library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * The GNU C Library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with the GNU C Library. If not, see
+ * .
+ */
+
+#ifndef _MIPS_LDSODEFS_H
+#define _MIPS_LDSODEFS_H 1
+
+#include
+
+struct La_mips_32_regs;
+struct La_mips_32_retval;
+struct La_mips_64_regs;
+struct La_mips_64_retval;
+
+#define ARCH_PLTENTER_MEMBERS \
+ Elf32_Addr (*mips_o32_gnu_pltenter)(Elf32_Sym *, unsigned int, uintptr_t *, uintptr_t *, \
+ struct La_mips_32_regs *, unsigned int *, const char *name, \
+ long int *framesizep); \
+ Elf32_Addr (*mips_n32_gnu_pltenter)(Elf32_Sym *, unsigned int, uintptr_t *, uintptr_t *, \
+ struct La_mips_64_regs *, unsigned int *, const char *name, \
+ long int *framesizep); \
+ Elf64_Addr (*mips_n64_gnu_pltenter)(Elf64_Sym *, unsigned int, uintptr_t *, uintptr_t *, \
+ struct La_mips_64_regs *, unsigned int *, const char *name, \
+ long int *framesizep);
+
+#define ARCH_PLTEXIT_MEMBERS \
+ unsigned int (*mips_o32_gnu_pltexit)(Elf32_Sym *, unsigned int, uintptr_t *, uintptr_t *, \
+ const struct La_mips_32_regs *, struct La_mips_32_retval *, \
+ const char *); \
+ unsigned int (*mips_n32_gnu_pltexit)(Elf32_Sym *, unsigned int, uintptr_t *, uintptr_t *, \
+ const struct La_mips_64_regs *, struct La_mips_64_retval *, \
+ const char *); \
+ unsigned int (*mips_n64_gnu_pltexit)(Elf64_Sym *, unsigned int, uintptr_t *, uintptr_t *, \
+ const struct La_mips_64_regs *, struct La_mips_64_retval *, \
+ const char *);
+
+/* The MIPS ABI specifies that the dynamic section has to be read-only. */
+
+/*
+ * The 64-bit MIPS ELF ABI uses an unusual reloc format. Each
+ * relocation entry specifies up to three actual relocations, all at
+ * the same address. The first relocation which required a symbol
+ * uses the symbol in the r_sym field. The second relocation which
+ * requires a symbol uses the symbol in the r_ssym field. If all
+ * three relocations require a symbol, the third one uses a zero
+ * value.
+ *
+ * We define these structures in internal headers because we're not
+ * sure we want to make them part of the ABI yet. Eventually, some of
+ * this may move into elf/elf.h.
+ */
+
+/* An entry in a 64 bit SHT_REL section. */
+
+typedef struct {
+ Elf32_Word r_sym; /* Symbol index */
+ unsigned char r_ssym; /* Special symbol for 2nd relocation */
+ unsigned char r_type3; /* 3rd relocation type */
+ unsigned char r_type2; /* 2nd relocation type */
+ unsigned char r_type1; /* 1st relocation type */
+} _Elf64_Mips_R_Info;
+
+typedef union {
+ Elf64_Xword r_info_number;
+ _Elf64_Mips_R_Info r_info_fields;
+} _Elf64_Mips_R_Info_union;
+
+typedef struct {
+ Elf64_Addr r_offset; /* Address */
+ _Elf64_Mips_R_Info_union r_info; /* Relocation type and symbol index */
+} Elf64_Mips_Rel;
+
+typedef struct {
+ Elf64_Addr r_offset; /* Address */
+ _Elf64_Mips_R_Info_union r_info; /* Relocation type and symbol index */
+ Elf64_Sxword r_addend; /* Addend */
+} Elf64_Mips_Rela;
+
+#define ELF64_MIPS_R_SYM(i) ((__extension__(_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_sym)
+
+#define ELF64_MIPS_R_TYPE(i) \
+ (((_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_type1 | \
+ ((Elf32_Word)(__extension__(_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_type2 << 8) | \
+ ((Elf32_Word)(__extension__(_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_type3 << 16) | \
+ ((Elf32_Word)(__extension__(_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_ssym << 24))
+
+#define ELF64_MIPS_R_INFO(sym, type) \
+ (__extension__(_Elf64_Mips_R_Info_union)( \
+ __extension__(_Elf64_Mips_R_Info){ (sym), ELF64_MIPS_R_SSYM(type), ELF64_MIPS_R_TYPE3(type), \
+ ELF64_MIPS_R_TYPE2(type), ELF64_MIPS_R_TYPE1(type) }) \
+ .r_info_number)
+
+/*
+ * These macros decompose the value returned by ELF64_MIPS_R_TYPE, and
+ * compose it back into a value that it can be used as an argument to
+ * ELF64_MIPS_R_INFO.
+ */
+#define ELF64_MIPS_R_SSYM(i) (((i) >> 24) & 0xff)
+#define ELF64_MIPS_R_TYPE3(i) (((i) >> 16) & 0xff)
+#define ELF64_MIPS_R_TYPE2(i) (((i) >> 8) & 0xff)
+#define ELF64_MIPS_R_TYPE1(i) ((i)&0xff)
+#define ELF64_MIPS_R_TYPEENC(type1, type2, type3, ssym) \
+ ((type1) | ((Elf32_Word)(type2) << 8) | ((Elf32_Word)(type3) << 16) | ((Elf32_Word)(ssym) << 24))
+
+#undef ELF64_R_SYM
+#define ELF64_R_SYM(i) ELF64_MIPS_R_SYM(i)
+#undef ELF64_R_TYPE
+
+/*fixme*/
+#define ELF64_R_TYPE(i) (ELF64_MIPS_R_TYPE(i) & 0x00ff)
+#undef ELF64_R_INFO
+#define ELF64_R_INFO(sym, type) ELF64_MIPS_R_INFO((sym), (type))
+
+#endif
diff --git a/compel/arch/mips/src/lib/include/syscall.h b/compel/arch/mips/src/lib/include/syscall.h
new file mode 100644
index 000000000..6cad7ca73
--- /dev/null
+++ b/compel/arch/mips/src/lib/include/syscall.h
@@ -0,0 +1,7 @@
+#ifndef __COMPEL_SYSCALL_H__
+#define __COMPEL_SYSCALL_H__
+
+#ifndef SIGSTKFLT
+#define SIGSTKFLT 16
+#endif
+#endif
diff --git a/compel/arch/mips/src/lib/include/uapi/asm/breakpoints.h b/compel/arch/mips/src/lib/include/uapi/asm/breakpoints.h
new file mode 100644
index 000000000..21eb1309f
--- /dev/null
+++ b/compel/arch/mips/src/lib/include/uapi/asm/breakpoints.h
@@ -0,0 +1,6 @@
+#ifndef __COMPEL_BREAKPOINTS_H__
+#define __COMPEL_BREAKPOINTS_H__
+#define ARCH_SI_TRAP TRAP_BRKPT
+extern int ptrace_set_breakpoint(pid_t pid, void *addr);
+extern int ptrace_flush_breakpoints(pid_t pid);
+#endif
diff --git a/compel/arch/mips/src/lib/include/uapi/asm/cpu.h b/compel/arch/mips/src/lib/include/uapi/asm/cpu.h
new file mode 100644
index 000000000..e568df789
--- /dev/null
+++ b/compel/arch/mips/src/lib/include/uapi/asm/cpu.h
@@ -0,0 +1,6 @@
+#ifndef __CR_ASM_CPU_H__
+#define __CR_ASM_CPU_H__
+
+typedef struct {
+} compel_cpuinfo_t;
+#endif /* __CR_ASM_CPU_H__ */
diff --git a/compel/arch/mips/src/lib/include/uapi/asm/fpu.h b/compel/arch/mips/src/lib/include/uapi/asm/fpu.h
new file mode 100644
index 000000000..7f476d541
--- /dev/null
+++ b/compel/arch/mips/src/lib/include/uapi/asm/fpu.h
@@ -0,0 +1,4 @@
+#ifndef __CR_ASM_FPU_H__
+#define __CR_ASM_FPU_H__
+
+#endif /* __CR_ASM_FPU_H__ */
diff --git a/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h
new file mode 100644
index 000000000..481566a12
--- /dev/null
+++ b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h
@@ -0,0 +1,68 @@
+#ifndef UAPI_COMPEL_ASM_TYPES_H__
+#define UAPI_COMPEL_ASM_TYPES_H__
+
+#include
+#include
+#include
+#include
+#include
+#define SIGMAX 64
+#define SIGMAX_OLD 31
+
+/*
+ * Copied from the Linux kernel header arch/mips/include/asm/ptrace.h
+ *
+ * A thread MIPS CPU context
+ */
+typedef struct {
+ /* Saved main processor registers. */
+ __u64 regs[32];
+
+ /* Saved special registers. */
+ __u64 lo;
+ __u64 hi;
+ __u64 cp0_epc;
+ __u64 cp0_badvaddr;
+ __u64 cp0_status;
+ __u64 cp0_cause;
+} user_regs_struct_t;
+
+/* from linux-3.10/arch/mips/kernel/ptrace.c */
+typedef struct {
+ /* Saved fpu registers. */
+ __u64 regs[32];
+
+ __u32 fpu_fcr31;
+ __u32 fpu_id;
+
+} user_fpregs_struct_t;
+
+#define MIPS_a0 regs[4] //arguments a0-a3
+#define MIPS_t0 regs[8] //temporaries t0-t7
+#define MIPS_v0 regs[2]
+#define MIPS_v1 regs[3]
+#define MIPS_sp regs[29]
+#define MIPS_ra regs[31]
+
+#define NATIVE_MAGIC 0x0A
+#define COMPAT_MAGIC 0x0C
+static inline bool user_regs_native(user_regs_struct_t *pregs)
+{
+ return true;
+}
+
+#define __compel_arch_fetch_thread_area(tid, th) 0
+#define compel_arch_fetch_thread_area(tctl) 0
+#define compel_arch_get_tls_task(ctl, tls)
+#define compel_arch_get_tls_thread(tctl, tls)
+
+#define REG_RES(regs) ((regs).MIPS_v0)
+#define REG_IP(regs) ((regs).cp0_epc)
+#define SET_REG_IP(regs, val) ((regs).cp0_epc = (val))
+#define REG_SP(regs) ((regs).MIPS_sp)
+#define REG_SYSCALL_NR(regs) ((regs).MIPS_v0)
+
+//#define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall)
+#define __NR(syscall, compat) __NR_##syscall
+
+#endif /* UAPI_COMPEL_ASM_TYPES_H__ */
diff --git a/compel/arch/mips/src/lib/include/uapi/asm/sigframe.h b/compel/arch/mips/src/lib/include/uapi/asm/sigframe.h
new file mode 100644
index 000000000..5d0a0628e
--- /dev/null
+++ b/compel/arch/mips/src/lib/include/uapi/asm/sigframe.h
@@ -0,0 +1,58 @@
+#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__
+#define UAPI_COMPEL_ASM_SIGFRAME_H__
+
+#include
+#include
+
+#include
+#include
+
+#include
+#define u32 __u32
+
+/* sigcontext defined in /usr/include/asm/sigcontext.h*/
+#define rt_sigcontext sigcontext
+
+#include
+
+/* refer to linux-3.10/include/uapi/asm-generic/ucontext.h */
+struct k_ucontext {
+ unsigned long uc_flags;
+ struct k_ucontext *uc_link;
+ stack_t uc_stack;
+ struct sigcontext uc_mcontext;
+ k_rtsigset_t uc_sigmask;
+};
+
+/* Copy from the kernel source arch/mips/kernel/signal.c */
+struct rt_sigframe {
+ u32 rs_ass[4]; /* argument save space for o32 */
+ u32 rs_pad[2]; /* Was: signal trampoline */
+ siginfo_t rs_info;
+ struct k_ucontext rs_uc;
+};
+
+#define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->rs_uc)
+#define RT_SIGFRAME_UC_SIGMASK(rt_sigframe) ((k_rtsigset_t *)(void *)&rt_sigframe->rs_uc.uc_sigmask)
+#define RT_SIGFRAME_REGIP(rt_sigframe) ((long unsigned int)0x00)
+#define RT_SIGFRAME_FPU(rt_sigframe)
+#define RT_SIGFRAME_HAS_FPU(rt_sigframe) 1
+
+#define RT_SIGFRAME_OFFSET(rt_sigframe) 0
+
+/* clang-format off */
+#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe) \
+ asm volatile( \
+ "move $29, %0 \n" \
+ "li $2, "__stringify(__NR_rt_sigreturn)" \n" \
+ "syscall \n" \
+ : \
+ : "r"(new_sp) \
+ : "$2","memory")
+/* clang-format on */
+
+int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe);
+
+#define rt_sigframe_erase_sigset(sigframe) memset(&sigframe->rs_uc.uc_sigmask, 0, sizeof(k_rtsigset_t))
+#define rt_sigframe_copy_sigset(sigframe, from) memcpy(&sigframe->rs_uc.uc_sigmask, from, sizeof(k_rtsigset_t))
+#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */
diff --git a/compel/arch/mips/src/lib/include/uapi/asm/siginfo.h b/compel/arch/mips/src/lib/include/uapi/asm/siginfo.h
new file mode 100644
index 000000000..6db1ddbd3
--- /dev/null
+++ b/compel/arch/mips/src/lib/include/uapi/asm/siginfo.h
@@ -0,0 +1,123 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998, 1999, 2001, 2003 Ralf Baechle
+ * Copyright (C) 2000, 2001 Silicon Graphics, Inc.
+ */
+#ifndef _UAPI_ASM_SIGINFO_H
+#define _UAPI_ASM_SIGINFO_H
+
+#define __ARCH_SIGEV_PREAMBLE_SIZE (sizeof(long) + 2 * sizeof(int))
+#undef __ARCH_SI_TRAPNO /* exception code needs to fill this ... */
+
+#define HAVE_ARCH_SIGINFO_T
+
+/*
+ * Careful to keep union _sifields from shifting ...
+ */
+
+#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
+
+#define __ARCH_SIGSYS
+
+#define SI_MAX_SIZE 128
+#define SI_PAD_SIZE ((SI_MAX_SIZE - __ARCH_SI_PREAMBLE_SIZE) / sizeof(int))
+#define __ARCH_SI_UID_T __kernel_uid32_t
+
+#ifndef __ARCH_SI_UID_T
+#define __ARCH_SI_UID_T __kernel_uid32_t
+#endif
+
+#ifndef __ARCH_SI_BAND_T
+#define __ARCH_SI_BAND_T long
+#endif
+
+#ifndef __ARCH_SI_CLOCK_T
+#define __ARCH_SI_CLOCK_T __kernel_clock_t
+#endif
+
+#ifndef __ARCH_SI_ATTRIBUTES
+#define __ARCH_SI_ATTRIBUTES
+#endif
+
+typedef struct siginfo {
+ int si_signo;
+ int si_errno;
+ int si_code;
+
+ union {
+ int _pad[SI_PAD_SIZE];
+
+ /* kill() */
+ struct {
+ __kernel_pid_t _pid; /* sender's pid */
+ __ARCH_SI_UID_T _uid; /* sender's uid */
+ } _kill;
+
+ /* POSIX.1b timers */
+ struct {
+ __kernel_timer_t _tid; /* timer id */
+ int _overrun; /* overrun count */
+ char _pad[sizeof(__ARCH_SI_UID_T) - sizeof(int)];
+ sigval_t _sigval; /* same as below */
+ int _sys_private; /* not to be passed to user */
+ } _timer;
+
+ /* POSIX.1b signals */
+ struct {
+ __kernel_pid_t _pid; /* sender's pid */
+ __ARCH_SI_UID_T _uid; /* sender's uid */
+ sigval_t _sigval;
+ } _rt;
+
+ /* SIGCHLD */
+ struct {
+ __kernel_pid_t _pid; /* which child */
+ __ARCH_SI_UID_T _uid; /* sender's uid */
+ int _status; /* exit code */
+ __ARCH_SI_CLOCK_T _utime;
+ __ARCH_SI_CLOCK_T _stime;
+ } _sigchld;
+
+ /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+ struct {
+ void *_addr; /* faulting insn/memory ref. */
+#ifdef __ARCH_SI_TRAPNO
+ int _trapno; /* TRAP # which caused the signal */
+#endif
+ short _addr_lsb; /* LSB of the reported address */
+#ifndef __GENKSYMS__
+ struct {
+ void *_lower;
+ void *_upper;
+ } _addr_bnd;
+#endif
+ } _sigfault;
+
+ /* SIGPOLL */
+ struct {
+ __ARCH_SI_BAND_T _band; /* POLL_IN, POLL_OUT, POLL_MSG */
+ int _fd;
+ } _sigpoll;
+
+ /* SIGSYS */
+ struct {
+ void *_call_addr; /* calling user insn */
+ int _syscall; /* triggering system call number */
+ unsigned int _arch; /* AUDIT_ARCH_* of syscall */
+ } _sigsys;
+ } _sifields;
+} __ARCH_SI_ATTRIBUTES siginfo_t;
+
+/*
+ * si_code values
+ * Again these have been chosen to be IRIX compatible.
+ */
+#undef SI_ASYNCIO
+#undef SI_TIMER
+#undef SI_MESGQ
+#define SI_ASYNCIO -2 /* sent by AIO completion */
+
+#endif /* _UAPI_ASM_SIGINFO_H */
diff --git a/compel/arch/mips/src/lib/infect.c b/compel/arch/mips/src/lib/infect.c
new file mode 100644
index 000000000..a1d4865cc
--- /dev/null
+++ b/compel/arch/mips/src/lib/infect.c
@@ -0,0 +1,310 @@
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include "errno.h"
+#include
+#include
+#include "common/err.h"
+#include "common/page.h"
+#include "asm/infect-types.h"
+#include "ptrace.h"
+#include "infect.h"
+#include "infect-priv.h"
+#include "log.h"
+#include "common/bug.h"
+
+/*
+ * Injected syscall instruction
+ * mips64el is Little Endian
+ */
+const char code_syscall[] = {
+ 0x0c, 0x00, 0x00, 0x00, /* syscall */
+ 0x0d, 0x00, 0x00, 0x00 /* break */
+};
+
+/* 10-byte legacy floating point register */
+struct fpreg {
+ uint16_t significand[4];
+ uint16_t exponent;
+};
+
+/* 16-byte floating point register */
+struct fpxreg {
+ uint16_t significand[4];
+ uint16_t exponent;
+ uint16_t padding[3];
+};
+
+int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
+{
+ sigframe->rs_uc.uc_mcontext.sc_regs[0] = regs->regs[0];
+ sigframe->rs_uc.uc_mcontext.sc_regs[1] = regs->regs[1];
+ sigframe->rs_uc.uc_mcontext.sc_regs[2] = regs->regs[2];
+ sigframe->rs_uc.uc_mcontext.sc_regs[3] = regs->regs[3];
+ sigframe->rs_uc.uc_mcontext.sc_regs[4] = regs->regs[4];
+ sigframe->rs_uc.uc_mcontext.sc_regs[5] = regs->regs[5];
+ sigframe->rs_uc.uc_mcontext.sc_regs[6] = regs->regs[6];
+ sigframe->rs_uc.uc_mcontext.sc_regs[7] = regs->regs[7];
+ sigframe->rs_uc.uc_mcontext.sc_regs[8] = regs->regs[8];
+ sigframe->rs_uc.uc_mcontext.sc_regs[9] = regs->regs[9];
+ sigframe->rs_uc.uc_mcontext.sc_regs[10] = regs->regs[10];
+ sigframe->rs_uc.uc_mcontext.sc_regs[11] = regs->regs[11];
+ sigframe->rs_uc.uc_mcontext.sc_regs[12] = regs->regs[12];
+ sigframe->rs_uc.uc_mcontext.sc_regs[13] = regs->regs[13];
+ sigframe->rs_uc.uc_mcontext.sc_regs[14] = regs->regs[14];
+ sigframe->rs_uc.uc_mcontext.sc_regs[15] = regs->regs[15];
+ sigframe->rs_uc.uc_mcontext.sc_regs[16] = regs->regs[16];
+ sigframe->rs_uc.uc_mcontext.sc_regs[17] = regs->regs[17];
+ sigframe->rs_uc.uc_mcontext.sc_regs[18] = regs->regs[18];
+ sigframe->rs_uc.uc_mcontext.sc_regs[19] = regs->regs[19];
+ sigframe->rs_uc.uc_mcontext.sc_regs[20] = regs->regs[20];
+ sigframe->rs_uc.uc_mcontext.sc_regs[21] = regs->regs[21];
+ sigframe->rs_uc.uc_mcontext.sc_regs[22] = regs->regs[22];
+ sigframe->rs_uc.uc_mcontext.sc_regs[23] = regs->regs[23];
+ sigframe->rs_uc.uc_mcontext.sc_regs[24] = regs->regs[24];
+ sigframe->rs_uc.uc_mcontext.sc_regs[25] = regs->regs[25];
+ sigframe->rs_uc.uc_mcontext.sc_regs[26] = regs->regs[26];
+ sigframe->rs_uc.uc_mcontext.sc_regs[27] = regs->regs[27];
+ sigframe->rs_uc.uc_mcontext.sc_regs[28] = regs->regs[28];
+ sigframe->rs_uc.uc_mcontext.sc_regs[29] = regs->regs[29];
+ sigframe->rs_uc.uc_mcontext.sc_regs[30] = regs->regs[30];
+ sigframe->rs_uc.uc_mcontext.sc_regs[31] = regs->regs[31];
+ sigframe->rs_uc.uc_mcontext.sc_mdlo = regs->lo;
+ sigframe->rs_uc.uc_mcontext.sc_mdhi = regs->hi;
+ sigframe->rs_uc.uc_mcontext.sc_pc = regs->cp0_epc;
+
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[0] = fpregs->regs[0];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[1] = fpregs->regs[1];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[2] = fpregs->regs[2];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[3] = fpregs->regs[3];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[4] = fpregs->regs[4];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[5] = fpregs->regs[5];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[6] = fpregs->regs[6];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[7] = fpregs->regs[7];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[8] = fpregs->regs[8];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[9] = fpregs->regs[9];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[10] = fpregs->regs[10];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[11] = fpregs->regs[11];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[12] = fpregs->regs[12];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[13] = fpregs->regs[13];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[14] = fpregs->regs[14];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[15] = fpregs->regs[15];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[16] = fpregs->regs[16];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[17] = fpregs->regs[17];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[18] = fpregs->regs[18];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[19] = fpregs->regs[19];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[20] = fpregs->regs[20];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[21] = fpregs->regs[21];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[22] = fpregs->regs[22];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[23] = fpregs->regs[23];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[24] = fpregs->regs[24];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[25] = fpregs->regs[25];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[26] = fpregs->regs[26];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[27] = fpregs->regs[27];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[28] = fpregs->regs[28];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[29] = fpregs->regs[29];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[30] = fpregs->regs[30];
+ sigframe->rs_uc.uc_mcontext.sc_fpregs[31] = fpregs->regs[31];
+
+ return 0;
+}
+
+int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe)
+{
+ return 0;
+}
+
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *xs, save_regs_t save,
+ void *arg, __maybe_unused unsigned long flags)
+{
+ int ret = -1;
+
+ pr_info("Dumping GP/FPU registers for %d\n", pid);
+
+ if (ptrace(PTRACE_GETFPREGS, pid, NULL, xs)) {
+ pr_perror("Can't obtain FPU registers for %d", pid);
+ return ret;
+ }
+
+ /*Restart the system call*/
+ if (regs->regs[0]) {
+ switch ((long)(int)regs->regs[2]) {
+ case ERESTARTNOHAND:
+ case ERESTARTSYS:
+ case ERESTARTNOINTR:
+ regs->regs[2] = regs->regs[0];
+ regs->regs[7] = regs->regs[26];
+ regs->cp0_epc -= 4;
+ break;
+ case ERESTART_RESTARTBLOCK:
+ pr_warn("Will restore %d with interrupted system call\n", pid);
+ regs->regs[2] = -EINTR;
+ break;
+ }
+ regs->regs[0] = 0;
+ }
+
+ ret = save(pid, arg, regs, xs);
+ return ret;
+}
+
+int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
+{
+ pr_info("Restoring GP/FPU registers for %d\n", pid);
+
+ if (ptrace(PTRACE_SETFPREGS, pid, NULL, ext_regs)) {
+ pr_perror("Can't set FPU registers for %d", pid);
+ return -1;
+ }
+ return 0;
+}
+
+int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6)
+{
+ /*refer to glibc-2.20/sysdeps/unix/sysv/linux/mips/mips64/syscall.S*/
+ user_regs_struct_t regs = ctl->orig.regs;
+ int err;
+
+ regs.regs[2] = (unsigned long)nr; //syscall_number will be in v0
+ regs.regs[4] = arg1;
+ regs.regs[5] = arg2;
+ regs.regs[6] = arg3;
+ regs.regs[7] = arg4;
+ regs.regs[8] = arg5;
+ regs.regs[9] = arg6;
+
+ err = compel_execute_syscall(ctl, ®s, code_syscall);
+ *ret = regs.regs[2];
+
+ return err;
+}
+
+void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+ long map;
+ int err;
+
+ err = compel_syscall(ctl, __NR_mmap, &map, (unsigned long)addr, length, prot, flags, fd, offset >> PAGE_SHIFT);
+
+ if (err < 0 || IS_ERR_VALUE(map)) {
+ pr_err("remote mmap() failed: %s\n", strerror(-map));
+ return NULL;
+ }
+
+ return (void *)map;
+}
+
+/*
+ * regs must be inited when calling this function from original context
+ */
+void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
+{
+ regs->cp0_epc = new_ip;
+ if (stack) {
+ /* regs[29] is sp */
+ regs->regs[29] = (unsigned long)stack;
+ }
+}
+
+bool arch_can_dump_task(struct parasite_ctl *ctl)
+{
+ return true;
+}
+
+int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
+{
+ long ret;
+ int err;
+
+ err = compel_syscall(ctl, __NR_sigaltstack, &ret, 0, (unsigned long)&s->rs_uc.uc_stack, 0, 0, 0, 0);
+ return err ? err : ret;
+}
+
+int ptrace_set_breakpoint(pid_t pid, void *addr)
+{
+ return 0;
+}
+
+int ptrace_flush_breakpoints(pid_t pid)
+{
+ return 0;
+}
+
+/*refer to kernel linux-3.10/arch/mips/include/asm/processor.h*/
+#define TASK_SIZE32 0x7fff8000UL
+#define TASK_SIZE64 0x10000000000UL
+#define TASK_SIZE TASK_SIZE64
+
+unsigned long compel_task_size(void)
+{
+ return TASK_SIZE;
+}
+
+/*
+ * Get task registers (overwrites weak function)
+ *
+ */
+int ptrace_get_regs(int pid, user_regs_struct_t *regs)
+{
+ return ptrace(PTRACE_GETREGS, pid, NULL, regs);
+}
+
+/*
+ * Set task registers (overwrites weak function)
+ */
+int ptrace_set_regs(int pid, user_regs_struct_t *regs)
+{
+ return ptrace(PTRACE_SETREGS, pid, NULL, regs);
+}
+
+void compel_relocs_apply_mips(void *mem, void *vbase, struct parasite_blob_desc *pbd)
+{
+ compel_reloc_t *elf_relocs = pbd->hdr.relocs;
+ size_t nr_relocs = pbd->hdr.nr_relocs;
+ size_t i, j;
+
+ /*
+ * mips rebasing :load time relocation
+ * parasite.built-in.o and restorer.built-in.o is ELF 64-bit LSB relocatable for mips.
+ * so we have to relocate some type for R_MIPS_26 R_MIPS_HIGHEST R_MIPS_HIGHER R_MIPS_HI16 and R_MIPS_LO16 in there.
+ * for mips64el .if toload/store data or jump instruct ,need to relocation R_TYPE
+ */
+ for (i = 0, j = 0; i < nr_relocs; i++) {
+ if (elf_relocs[i].type & COMPEL_TYPE_MIPS_26) {
+ int *where = (mem + elf_relocs[i].offset);
+ *where = *where |
+ ((elf_relocs[i].addend + ((unsigned long)vbase & 0x00fffffff) /*low 28 bit*/) >> 2);
+ } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_64) {
+ unsigned long *where = (mem + elf_relocs[i].offset);
+ *where = elf_relocs[i].addend + (unsigned long)vbase;
+ } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_HI16) {
+ /* refer to binutils mips.cc */
+ int *where = (mem + elf_relocs[i].offset);
+ int v_lo16 = (unsigned long)vbase & 0x00ffff;
+
+ if ((v_lo16 + elf_relocs[i].value + elf_relocs[i].addend) >= 0x8000) {
+ *where = *where | ((((unsigned long)vbase >> 16) & 0xffff) + 0x1);
+ } else {
+ *where = *where | ((((unsigned long)vbase >> 16) & 0xffff));
+ }
+ } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_LO16) {
+ int *where = (mem + elf_relocs[i].offset);
+ int v_lo16 = (unsigned long)vbase & 0x00ffff;
+ *where = *where | ((v_lo16 + elf_relocs[i].addend) & 0xffff);
+ } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_HIGHER) {
+ int *where = (mem + elf_relocs[i].offset);
+ *where = *where | ((((unsigned long)vbase + (uint64_t)0x80008000) >> 32) & 0xffff);
+ } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_HIGHEST) {
+ int *where = (mem + elf_relocs[i].offset);
+ *where = *where | ((((unsigned long)vbase + (uint64_t)0x800080008000llu) >> 48) & 0xffff);
+ } else {
+ BUG();
+ }
+ }
+}
diff --git a/compel/arch/ppc64/plugins/include/asm/syscall-types.h b/compel/arch/ppc64/plugins/include/asm/syscall-types.h
index 7754721e2..1bea8496b 100644
--- a/compel/arch/ppc64/plugins/include/asm/syscall-types.h
+++ b/compel/arch/ppc64/plugins/include/asm/syscall-types.h
@@ -1,7 +1,7 @@
#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__
#define COMPEL_ARCH_SYSCALL_TYPES_H__
-#define SA_RESTORER 0x04000000U
+#define SA_RESTORER 0x04000000U
typedef void rt_signalfn_t(int, siginfo_t *, void *);
typedef rt_signalfn_t *rt_sighandler_t;
@@ -9,20 +9,20 @@ typedef rt_signalfn_t *rt_sighandler_t;
typedef void rt_restorefn_t(void);
typedef rt_restorefn_t *rt_sigrestore_t;
-#define _KNSIG 64
-#define _NSIG_BPW 64
+#define _KNSIG 64
+#define _NSIG_BPW 64
-#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
+#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
typedef struct {
- unsigned long sig[_KNSIG_WORDS];
+ unsigned long sig[_KNSIG_WORDS];
} k_rtsigset_t;
typedef struct {
- rt_sighandler_t rt_sa_handler;
- unsigned long rt_sa_flags;
- rt_sigrestore_t rt_sa_restorer;
- k_rtsigset_t rt_sa_mask;
+ rt_sighandler_t rt_sa_handler;
+ unsigned long rt_sa_flags;
+ rt_sigrestore_t rt_sa_restorer;
+ k_rtsigset_t rt_sa_mask;
} rt_sigaction_t;
#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */
diff --git a/compel/arch/ppc64/plugins/std/parasite-head.S b/compel/arch/ppc64/plugins/std/parasite-head.S
index c870efdc2..c675ab508 100644
--- a/compel/arch/ppc64/plugins/std/parasite-head.S
+++ b/compel/arch/ppc64/plugins/std/parasite-head.S
@@ -4,10 +4,6 @@
.align 8
ENTRY(__export_parasite_head_start)
-
- // int __used parasite_service(unsigned int cmd, void *args)
- // cmd = r3 = *__export_parasite_cmd (u32 ?)
- // args = r4 = @parasite_args_ptr + @pc
bl 0f
0: mflr r2
@@ -15,12 +11,6 @@ ENTRY(__export_parasite_head_start)
addis reg,r2,(name - 0b)@ha; \
addi reg,r2,(name - 0b)@l;
- LOAD_REG_ADDR(r3,__export_parasite_cmd)
- lwz r3,0(r3)
-
- LOAD_REG_ADDR(r4,parasite_args_ptr)
- ld r4,0(r4)
-
LOAD_REG_ADDR(r12,parasite_service_ptr)
ld r12,0(r12)
mtctr r12
@@ -28,9 +18,6 @@ ENTRY(__export_parasite_head_start)
bctrl // call parasite_service
twi 31,0,0 // Should generate SIGTRAP
-parasite_args_ptr:
- .quad __export_parasite_args
-
parasite_service_ptr:
// We want to run the function prototype to set r2.
// Since the relocation will prefer the local entry
@@ -39,7 +26,4 @@ parasite_service_ptr:
// FIXME: There should be a way to specify the global entry here.
.quad parasite_service - 8
-__export_parasite_cmd:
- .long 0
-
END(__export_parasite_head_start)
diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
index 3b3079040..3deb41cf7 100644
--- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
+++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
@@ -82,7 +82,7 @@ __NR_sys_timer_settime 241 sys_timer_settime (kernel_timer_t timer_id, int flag
__NR_sys_timer_gettime 242 sys_timer_gettime (int timer_id, const struct itimerspec *setting)
__NR_sys_timer_getoverrun 243 sys_timer_getoverrun (int timer_id)
__NR_sys_timer_delete 244 sys_timer_delete (kernel_timer_t timer_id)
-__NR_clock_gettime 246 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp)
+__NR_clock_gettime 246 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
__NR_exit_group 234 sys_exit_group (int error_code)
__NR_waitid 272 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
__NR_set_robust_list 300 sys_set_robust_list (struct robust_list_head *head, size_t len)
@@ -108,3 +108,15 @@ __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz)
__NR_preadv 320 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h)
__NR_userfaultfd 364 sys_userfaultfd (int flags)
__NR_ppoll 281 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)
+__NR_open_tree 428 sys_open_tree (int dirfd, const char *pathname, unsigned int flags)
+__NR_move_mount 429 sys_move_mount (int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags)
+__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags)
+__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
+__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
+__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
+__NR_close_range 436 sys_close_range (unsigned int fd, unsigned int max_fd, unsigned int flags)
+__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
+__NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size)
+__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
+__NR_rseq 387 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+__NR_membarrier 365 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
diff --git a/compel/arch/ppc64/scripts/compel-pack.lds.S b/compel/arch/ppc64/scripts/compel-pack.lds.S
index e0f826d7d..f197fb999 100644
--- a/compel/arch/ppc64/scripts/compel-pack.lds.S
+++ b/compel/arch/ppc64/scripts/compel-pack.lds.S
@@ -12,7 +12,7 @@ SECTIONS
*(.compel.init)
}
- .data : {
+ .data : ALIGN(0x10000) {
*(.data*)
*(.bss*)
}
@@ -33,8 +33,4 @@ SECTIONS
*(.group*)
*(.eh_frame*)
}
-
-/* Parasite args should have 4 bytes align, as we have futex inside. */
-. = ALIGN(4);
-__export_parasite_args = .;
}
diff --git a/compel/arch/ppc64/src/lib/cpu.c b/compel/arch/ppc64/src/lib/cpu.c
index 338ab4891..f7a128ca3 100644
--- a/compel/arch/ppc64/src/lib/cpu.c
+++ b/compel/arch/ppc64/src/lib/cpu.c
@@ -2,6 +2,7 @@
#include
#include
#include
+#include
#include "compel-cpu.h"
@@ -9,7 +10,7 @@
#include "log.h"
-#undef LOG_PREFIX
+#undef LOG_PREFIX
#define LOG_PREFIX "cpu: "
static compel_cpuinfo_t rt_info;
@@ -24,10 +25,20 @@ static void fetch_rt_cpuinfo(void)
}
}
-void compel_set_cpu_cap(compel_cpuinfo_t *info, unsigned int feature) { }
-void compel_clear_cpu_cap(compel_cpuinfo_t *info, unsigned int feature) { }
-int compel_test_fpu_cap(compel_cpuinfo_t *info, unsigned int feature) { return 0; }
-int compel_test_cpu_cap(compel_cpuinfo_t *info, unsigned int feature) { return 0; }
+void compel_set_cpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+}
+void compel_clear_cpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+}
+int compel_test_fpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+ return 0;
+}
+int compel_test_cpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+ return 0;
+}
int compel_cpuid(compel_cpuinfo_t *info)
{
diff --git a/compel/arch/ppc64/src/lib/handle-elf.c b/compel/arch/ppc64/src/lib/handle-elf.c
index 3d4020f59..84a360c43 100644
--- a/compel/arch/ppc64/src/lib/handle-elf.c
+++ b/compel/arch/ppc64/src/lib/handle-elf.c
@@ -1,20 +1,17 @@
#include
-
-#include "uapi/compel.h"
+#include
#include "handle-elf.h"
#include "piegen.h"
#include "log.h"
-static const unsigned char __maybe_unused
-elf_ident_64_le[EI_NIDENT] = {
- 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,
+static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = {
+ 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
-static const unsigned char __maybe_unused
-elf_ident_64_be[EI_NIDENT] = {
- 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x02, 0x01, 0x00,
+static const unsigned char __maybe_unused elf_ident_64_be[EI_NIDENT] = {
+ 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x02, 0x01, 0x00, /* clang-format */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
diff --git a/compel/arch/ppc64/src/lib/include/handle-elf.h b/compel/arch/ppc64/src/lib/include/handle-elf.h
index 1a8217e6b..ae20186a2 100644
--- a/compel/arch/ppc64/src/lib/include/handle-elf.h
+++ b/compel/arch/ppc64/src/lib/include/handle-elf.h
@@ -5,8 +5,8 @@
#define ELF_PPC64
-#define __handle_elf handle_elf_ppc64
-#define arch_is_machine_supported(e_machine) (e_machine == EM_PPC64)
+#define __handle_elf handle_elf_ppc64
+#define arch_is_machine_supported(e_machine) (e_machine == EM_PPC64)
extern int handle_elf_ppc64(void *mem, size_t size);
diff --git a/compel/arch/ppc64/src/lib/include/syscall.h b/compel/arch/ppc64/src/lib/include/syscall.h
index e2ec1272e..13ee906e1 100644
--- a/compel/arch/ppc64/src/lib/include/syscall.h
+++ b/compel/arch/ppc64/src/lib/include/syscall.h
@@ -1,4 +1,8 @@
#ifndef __COMPEL_SYSCALL_H__
#define __COMPEL_SYSCALL_H__
-#define __NR(syscall, compat) __NR_##syscall
+#define __NR(syscall, compat) \
+ ({ \
+ (void)compat; \
+ __NR_##syscall; \
+ })
#endif
diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/cpu.h b/compel/arch/ppc64/src/lib/include/uapi/asm/cpu.h
index 59925868c..475e2bd59 100644
--- a/compel/arch/ppc64/src/lib/include/uapi/asm/cpu.h
+++ b/compel/arch/ppc64/src/lib/include/uapi/asm/cpu.h
@@ -4,7 +4,7 @@
#include
typedef struct {
- uint64_t hwcap[2];
+ uint64_t hwcap[2];
} compel_cpuinfo_t;
#endif /* UAPI_COMPEL_ASM_CPU_H__ */
diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h
index 89fc4aa3c..25fc747e2 100644
--- a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h
+++ b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h
@@ -5,8 +5,8 @@
#include
#include
-#define SIGMAX_OLD 31
-#define SIGMAX 64
+#define SIGMAX_OLD 31
+#define SIGMAX 64
/*
* Copied from kernel header arch/powerpc/include/uapi/asm/ptrace.h
@@ -15,44 +15,44 @@ typedef struct {
unsigned long gpr[32];
unsigned long nip;
unsigned long msr;
- unsigned long orig_gpr3; /* Used for restarting system calls */
+ unsigned long orig_gpr3; /* Used for restarting system calls */
unsigned long ctr;
unsigned long link;
unsigned long xer;
unsigned long ccr;
- unsigned long softe; /* Soft enabled/disabled */
- unsigned long trap; /* Reason for being here */
+ unsigned long softe; /* Soft enabled/disabled */
+ unsigned long trap; /* Reason for being here */
/*
* N.B. for critical exceptions on 4xx, the dar and dsisr
* fields are overloaded to hold srr0 and srr1.
*/
- unsigned long dar; /* Fault registers */
- unsigned long dsisr; /* on 4xx/Book-E used for ESR */
- unsigned long result; /* Result of a system call */
+ unsigned long dar; /* Fault registers */
+ unsigned long dsisr; /* on 4xx/Book-E used for ESR */
+ unsigned long result; /* Result of a system call */
} user_regs_struct_t;
-#define NVSXREG 32
+#define NVSXREG 32
-#define USER_FPREGS_FL_FP 0x00001
-#define USER_FPREGS_FL_ALTIVEC 0x00002
-#define USER_FPREGS_FL_VSX 0x00004
-#define USER_FPREGS_FL_TM 0x00010
+#define USER_FPREGS_FL_FP 0x00001
+#define USER_FPREGS_FL_ALTIVEC 0x00002
+#define USER_FPREGS_FL_VSX 0x00004
+#define USER_FPREGS_FL_TM 0x00010
#ifndef NT_PPC_TM_SPR
-# define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */
-# define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */
-# define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */
-# define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */
-# define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */
+#define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */
+#define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */
+#define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */
+#define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */
+#define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */
#endif
-#define MSR_TMA (1UL<<34) /* bit 29 Trans Mem state: Transactional */
-#define MSR_TMS (1UL<<33) /* bit 30 Trans Mem state: Suspended */
-#define MSR_TM (1UL<<32) /* bit 31 Trans Mem Available */
-#define MSR_VEC (1UL<<25)
-#define MSR_VSX (1UL<<23)
+#define MSR_TMA (1UL << 34) /* bit 29 Trans Mem state: Transactional */
+#define MSR_TMS (1UL << 33) /* bit 30 Trans Mem state: Suspended */
+#define MSR_TM (1UL << 32) /* bit 31 Trans Mem Available */
+#define MSR_VEC (1UL << 25)
+#define MSR_VSX (1UL << 23)
-#define MSR_TM_ACTIVE(x) ((((x) & MSR_TM) && ((x)&(MSR_TMA|MSR_TMS))) != 0)
+#define MSR_TM_ACTIVE(x) ((((x)&MSR_TM) && ((x) & (MSR_TMA | MSR_TMS))) != 0)
typedef struct {
uint64_t fpregs[NFPREG];
@@ -72,15 +72,25 @@ typedef struct {
} tm;
} user_fpregs_struct_t;
-#define REG_RES(regs) ((uint64_t)(regs).gpr[3])
-#define REG_IP(regs) ((uint64_t)(regs).nip)
-#define REG_SP(regs) ((uint64_t)(regs).gpr[1])
-#define REG_SYSCALL_NR(regs) ((uint64_t)(regs).gpr[0])
+#define REG_RES(regs) ((uint64_t)(regs).gpr[3])
+#define REG_IP(regs) ((uint64_t)(regs).nip)
+#define SET_REG_IP(regs, val) ((regs).nip = (val))
+#define REG_SP(regs) ((uint64_t)(regs).gpr[1])
+#define REG_SYSCALL_NR(regs) ((uint64_t)(regs).gpr[0])
-#define user_regs_native(pregs) true
+#define user_regs_native(pregs) true
#define ARCH_SI_TRAP TRAP_BRKPT
-#define __NR(syscall, compat) __NR_##syscall
+#define __NR(syscall, compat) \
+ ({ \
+ (void)compat; \
+ __NR_##syscall; \
+ })
+
+#define __compel_arch_fetch_thread_area(tid, th) 0
+#define compel_arch_fetch_thread_area(tctl) 0
+#define compel_arch_get_tls_task(ctl, tls)
+#define compel_arch_get_tls_thread(tctl, tls)
#endif /* UAPI_COMPEL_ASM_TYPES_H__ */
diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h
index 9467a1b99..0c4ccb648 100644
--- a/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h
+++ b/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h
@@ -14,35 +14,41 @@
*/
#include
-// XXX: the idetifier rt_sigcontext is expected to be struct by the CRIU code
+// XXX: the identifier rt_sigcontext is expected to be struct by the CRIU code
#define rt_sigcontext sigcontext
#include
-#define RT_SIGFRAME_OFFSET(rt_sigframe) 0
+#define RT_SIGFRAME_OFFSET(rt_sigframe) 0
/* Copied from the Linux kernel header arch/powerpc/include/asm/ptrace.h */
-#define USER_REDZONE_SIZE 512
+#define USER_REDZONE_SIZE 512
+#if _CALL_ELF != 2
+#error Only supporting ABIv2.
+#else
+#define STACK_FRAME_MIN_SIZE 32
+#endif
/* Copied from the Linux kernel source file arch/powerpc/kernel/signal_64.c */
-#define TRAMP_SIZE 6
+#define TRAMP_SIZE 6
/*
* ucontext_t defined in /usr/include/powerpc64le-linux-gnu/sys/ucontext.h
*/
struct rt_sigframe {
- /* sys_rt_sigreturn requires the ucontext be the first field */
- ucontext_t uc;
- ucontext_t uc_transact; /* Transactional state */
- unsigned long _unused[2];
- unsigned int tramp[TRAMP_SIZE];
- struct rt_siginfo *pinfo;
- void *puc;
- struct rt_siginfo info;
- /* New 64 bit little-endian ABI allows redzone of 512 bytes below sp */
- char abigap[USER_REDZONE_SIZE];
+ /* sys_rt_sigreturn requires the ucontext be the first field */
+ ucontext_t uc;
+ ucontext_t uc_transact; /* Transactional state */
+ unsigned long _unused[2];
+ unsigned int tramp[TRAMP_SIZE];
+ struct rt_siginfo *pinfo;
+ void *puc;
+ struct rt_siginfo info;
+ /* New 64 bit little-endian ABI allows redzone of 512 bytes below sp */
+ char abigap[USER_REDZONE_SIZE];
} __attribute__((aligned(16)));
+/* clang-format off */
#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe) \
asm volatile( \
"mr 1, %0 \n" \
@@ -50,30 +56,29 @@ struct rt_sigframe {
"sc \n" \
: \
: "r"(new_sp) \
- : "1", "memory")
+ : "memory")
+/* clang-format on */
#if _CALL_ELF != 2
-# error Only supporting ABIv2.
+#error Only supporting ABIv2.
#else
-# define FRAME_MIN_SIZE_PARM 96
+#define FRAME_MIN_SIZE_PARM 96
#endif
-#define RT_SIGFRAME_UC(rt_sigframe) (&(rt_sigframe)->uc)
-#define RT_SIGFRAME_REGIP(rt_sigframe) ((long unsigned int)(rt_sigframe)->uc.uc_mcontext.gp_regs[PT_NIP])
-#define RT_SIGFRAME_HAS_FPU(rt_sigframe) (1)
-#define RT_SIGFRAME_FPU(rt_sigframe) (&(rt_sigframe)->uc.uc_mcontext)
+#define RT_SIGFRAME_UC(rt_sigframe) (&(rt_sigframe)->uc)
+#define RT_SIGFRAME_REGIP(rt_sigframe) ((long unsigned int)(rt_sigframe)->uc.uc_mcontext.gp_regs[PT_NIP])
+#define RT_SIGFRAME_HAS_FPU(rt_sigframe) (1)
+#define RT_SIGFRAME_FPU(rt_sigframe) (&(rt_sigframe)->uc.uc_mcontext)
-#define rt_sigframe_erase_sigset(sigframe) \
- memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t))
-#define rt_sigframe_copy_sigset(sigframe, from) \
- memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t))
+#define rt_sigframe_erase_sigset(sigframe) memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t))
+#define rt_sigframe_copy_sigset(sigframe, from) memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t))
-#define MSR_TMA (1UL<<34) /* bit 29 Trans Mem state: Transactional */
-#define MSR_TMS (1UL<<33) /* bit 30 Trans Mem state: Suspended */
-#define MSR_TM (1UL<<32) /* bit 31 Trans Mem Available */
-#define MSR_VEC (1UL<<25)
-#define MSR_VSX (1UL<<23)
+#define MSR_TMA (1UL << 34) /* bit 29 Trans Mem state: Transactional */
+#define MSR_TMS (1UL << 33) /* bit 30 Trans Mem state: Suspended */
+#define MSR_TM (1UL << 32) /* bit 31 Trans Mem Available */
+#define MSR_VEC (1UL << 25)
+#define MSR_VSX (1UL << 23)
-#define MSR_TM_ACTIVE(x) ((((x) & MSR_TM) && ((x)&(MSR_TMA|MSR_TMS))) != 0)
+#define MSR_TM_ACTIVE(x) ((((x)&MSR_TM) && ((x) & (MSR_TMA | MSR_TMS))) != 0)
#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */
diff --git a/compel/arch/ppc64/src/lib/infect.c b/compel/arch/ppc64/src/lib/infect.c
index defed3d85..54abd48a4 100644
--- a/compel/arch/ppc64/src/lib/infect.c
+++ b/compel/arch/ppc64/src/lib/infect.c
@@ -11,15 +11,16 @@
#include "log.h"
#include "common/bug.h"
#include "common/page.h"
+#include "common/err.h"
#include "infect.h"
#include "infect-priv.h"
#ifndef NT_PPC_TM_SPR
-#define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */
-#define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */
-#define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */
-#define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */
-#define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */
+#define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */
+#define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */
+#define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */
+#define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */
+#define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */
#endif
unsigned __page_size = 0;
@@ -29,8 +30,8 @@ unsigned __page_shift = 0;
* Injected syscall instruction
*/
const uint32_t code_syscall[] = {
- 0x44000002, /* sc */
- 0x0fe00000 /* twi 31,0,0 */
+ 0x44000002, /* sc */
+ 0x0fe00000 /* twi 31,0,0 */
};
static inline __always_unused void __check_code_syscall(void)
@@ -43,14 +44,14 @@ static void prep_gp_regs(mcontext_t *dst, user_regs_struct_t *regs)
{
memcpy(dst->gp_regs, regs->gpr, sizeof(regs->gpr));
- dst->gp_regs[PT_NIP] = regs->nip;
- dst->gp_regs[PT_MSR] = regs->msr;
- dst->gp_regs[PT_ORIG_R3] = regs->orig_gpr3;
- dst->gp_regs[PT_CTR] = regs->ctr;
- dst->gp_regs[PT_LNK] = regs->link;
- dst->gp_regs[PT_XER] = regs->xer;
- dst->gp_regs[PT_CCR] = regs->ccr;
- dst->gp_regs[PT_TRAP] = regs->trap;
+ dst->gp_regs[PT_NIP] = regs->nip;
+ dst->gp_regs[PT_MSR] = regs->msr;
+ dst->gp_regs[PT_ORIG_R3] = regs->orig_gpr3;
+ dst->gp_regs[PT_CTR] = regs->ctr;
+ dst->gp_regs[PT_LNK] = regs->link;
+ dst->gp_regs[PT_XER] = regs->xer;
+ dst->gp_regs[PT_CCR] = regs->ccr;
+ dst->gp_regs[PT_TRAP] = regs->trap;
}
static void put_fpu_regs(mcontext_t *mc, uint64_t *fpregs)
@@ -74,9 +75,7 @@ static void put_vsx_regs(mcontext_t *mc, uint64_t *vsxregs)
memcpy((uint64_t *)(mc->v_regs + 1), vsxregs, sizeof(*vsxregs) * NVSXREG);
}
-int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe,
- user_regs_struct_t *regs,
- user_fpregs_struct_t *fpregs)
+int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
{
mcontext_t *dst_tc = &sigframe->uc_transact.uc_mcontext;
mcontext_t *dst = &sigframe->uc.uc_mcontext;
@@ -134,14 +133,12 @@ static void update_vregs(mcontext_t *lcontext, mcontext_t *rcontext)
uint64_t offset = (uint64_t)(lcontext->v_regs) - (uint64_t)lcontext;
lcontext->v_regs = (vrregset_t *)((uint64_t)rcontext + offset);
- pr_debug("Updated v_regs:%llx (rcontext:%llx)\n",
- (unsigned long long)lcontext->v_regs,
+ pr_debug("Updated v_regs:%llx (rcontext:%llx)\n", (unsigned long long)lcontext->v_regs,
(unsigned long long)rcontext);
}
}
-int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *frame,
- struct rt_sigframe *rframe)
+int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *frame, struct rt_sigframe *rframe)
{
uint64_t msr = frame->uc.uc_mcontext.gp_regs[PT_MSR];
@@ -155,9 +152,8 @@ int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *frame,
/* Updating the transactional state address if any */
if (frame->uc.uc_link) {
- update_vregs(&frame->uc_transact.uc_mcontext,
- &rframe->uc_transact.uc_mcontext);
- frame->uc.uc_link = &rframe->uc_transact;
+ update_vregs(&frame->uc_transact.uc_mcontext, &rframe->uc_transact.uc_mcontext);
+ frame->uc.uc_link = &rframe->uc_transact;
}
return 0;
@@ -214,7 +210,7 @@ static int get_fpu_regs(pid_t pid, user_fpregs_struct_t *fp)
static int get_altivec_regs(pid_t pid, user_fpregs_struct_t *fp)
{
- if (ptrace(PTRACE_GETVRREGS, pid, 0, (void*)&fp->vrregs) < 0) {
+ if (ptrace(PTRACE_GETVRREGS, pid, 0, (void *)&fp->vrregs) < 0) {
/* PTRACE_GETVRREGS returns EIO if Altivec is not supported.
* This should not happen if msr_vec is set. */
if (errno != EIO) {
@@ -222,8 +218,7 @@ static int get_altivec_regs(pid_t pid, user_fpregs_struct_t *fp)
return -1;
}
pr_debug("Altivec not supported\n");
- }
- else {
+ } else {
pr_debug("Dumping Altivec registers\n");
fp->flags |= USER_FPREGS_FL_ALTIVEC;
}
@@ -241,7 +236,7 @@ static int get_altivec_regs(pid_t pid, user_fpregs_struct_t *fp)
*/
static int get_vsx_regs(pid_t pid, user_fpregs_struct_t *fp)
{
- if (ptrace(PTRACE_GETVSRREGS, pid, 0, (void*)fp->vsxregs) < 0) {
+ if (ptrace(PTRACE_GETVSRREGS, pid, 0, (void *)fp->vsxregs) < 0) {
/*
* EIO is returned in the case PTRACE_GETVRREGS is not
* supported.
@@ -251,8 +246,7 @@ static int get_vsx_regs(pid_t pid, user_fpregs_struct_t *fp)
return -1;
}
pr_debug("VSX register's dump not supported.\n");
- }
- else {
+ } else {
pr_debug("Dumping VSX registers\n");
fp->flags |= USER_FPREGS_FL_VSX;
}
@@ -265,22 +259,23 @@ static int get_tm_regs(pid_t pid, user_fpregs_struct_t *fpregs)
pr_debug("Dumping TM registers\n");
-#define TM_REQUIRED 0
-#define TM_OPTIONAL 1
-#define PTRACE_GET_TM(s,n,c,u) do { \
- iov.iov_base = &s; \
- iov.iov_len = sizeof(s); \
- if (ptrace(PTRACE_GETREGSET, pid, c, &iov)) { \
- if (!u || errno != EIO) { \
- pr_perror("Couldn't get TM "n); \
- pr_err("Your kernel seems to not support the " \
- "new TM ptrace API (>= 4.8)\n"); \
- goto out_free; \
- } \
- pr_debug("TM "n" not supported.\n"); \
- iov.iov_base = NULL; \
- } \
-} while(0)
+#define TM_REQUIRED 0
+#define TM_OPTIONAL 1
+#define PTRACE_GET_TM(s, n, c, u) \
+ do { \
+ iov.iov_base = &s; \
+ iov.iov_len = sizeof(s); \
+ if (ptrace(PTRACE_GETREGSET, pid, c, &iov)) { \
+ if (!u || errno != EIO) { \
+ pr_perror("Couldn't get TM " n); \
+ pr_err("Your kernel seems to not support the " \
+ "new TM ptrace API (>= 4.8)\n"); \
+ goto out_free; \
+ } \
+ pr_debug("TM " n " not supported.\n"); \
+ iov.iov_base = NULL; \
+ } \
+ } while (0)
/* Get special registers */
PTRACE_GET_TM(fpregs->tm.tm_spr_regs, "SPR", NT_PPC_TM_SPR, TM_REQUIRED);
@@ -306,37 +301,61 @@ static int get_tm_regs(pid_t pid, user_fpregs_struct_t *fpregs)
return 0;
out_free:
- return -1; /* still failing the checkpoint */
+ return -1; /* still failing the checkpoint */
}
-static int __get_task_regs(pid_t pid, user_regs_struct_t *regs,
- user_fpregs_struct_t *fpregs)
+/*
+ * This is inspired by kernel function check_syscall_restart in
+ * arch/powerpc/kernel/signal.c
+ */
+
+#ifndef TRAP
+#define TRAP(r) ((r).trap & ~0xF)
+#endif
+
+static bool trap_is_scv(user_regs_struct_t *regs)
+{
+ return TRAP(*regs) == 0x3000;
+}
+
+static bool trap_is_syscall(user_regs_struct_t *regs)
+{
+ return trap_is_scv(regs) || TRAP(*regs) == 0x0C00;
+}
+
+static void handle_syscall(pid_t pid, user_regs_struct_t *regs)
+{
+ unsigned long ret = regs->gpr[3];
+
+ if (trap_is_scv(regs)) {
+ if (!IS_ERR_VALUE(ret))
+ return;
+ ret = -ret;
+ } else if (!(regs->ccr & 0x10000000)) {
+ return;
+ }
+
+ /* Restart or interrupt the system call */
+ switch (ret) {
+ case ERESTARTNOHAND:
+ case ERESTARTSYS:
+ case ERESTARTNOINTR:
+ regs->gpr[3] = regs->orig_gpr3;
+ regs->nip -= 4;
+ break;
+ case ERESTART_RESTARTBLOCK:
+ pr_warn("Will restore %d with interrupted system call\n", pid);
+ regs->gpr[3] = trap_is_scv(regs) ? -EINTR : EINTR;
+ break;
+ }
+}
+
+static int __get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
{
pr_info("Dumping GP/FPU registers for %d\n", pid);
- /*
- * This is inspired by kernel function check_syscall_restart in
- * arch/powerpc/kernel/signal.c
- */
-#ifndef TRAP
-#define TRAP(r) ((r).trap & ~0xF)
-#endif
-
- if (TRAP(*regs) == 0x0C00 && regs->ccr & 0x10000000) {
- /* Restart the system call */
- switch (regs->gpr[3]) {
- case ERESTARTNOHAND:
- case ERESTARTSYS:
- case ERESTARTNOINTR:
- regs->gpr[3] = regs->orig_gpr3;
- regs->nip -= 4;
- break;
- case ERESTART_RESTARTBLOCK:
- regs->gpr[0] = __NR_restart_syscall;
- regs->nip -= 4;
- break;
- }
- }
+ if (trap_is_syscall(regs))
+ handle_syscall(pid, regs);
/* Resetting trap since we are now coming from user space. */
regs->trap = 0;
@@ -349,10 +368,8 @@ static int __get_task_regs(pid_t pid, user_regs_struct_t *regs,
* impossible) or suspended (easy to get).
*/
if (MSR_TM_ACTIVE(regs->msr)) {
- pr_debug("Task %d has %s TM operation at 0x%lx\n",
- pid,
- (regs->msr & MSR_TMS) ? "a suspended" : "an active",
- regs->nip);
+ pr_debug("Task %d has %s TM operation at 0x%lx\n", pid,
+ (regs->msr & MSR_TMS) ? "a suspended" : "an active", regs->nip);
if (get_tm_regs(pid, fpregs))
return -1;
fpregs->flags = USER_FPREGS_FL_TM;
@@ -374,26 +391,48 @@ static int __get_task_regs(pid_t pid, user_regs_struct_t *regs,
return 0;
}
-int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save,
- void *arg, __maybe_unused unsigned long flags)
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs, save_regs_t save,
+ void *arg, __maybe_unused unsigned long flags)
{
- user_fpregs_struct_t fpregs;
int ret;
- ret = __get_task_regs(pid, regs, &fpregs);
+ ret = __get_task_regs(pid, regs, fpregs);
if (ret)
return ret;
- return save(arg, regs, &fpregs);
+ return save(pid, arg, regs, fpregs);
}
-int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret,
- unsigned long arg1,
- unsigned long arg2,
- unsigned long arg3,
- unsigned long arg4,
- unsigned long arg5,
- unsigned long arg6)
+int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
+{
+ int ret = 0;
+
+ pr_info("Restoring GP/FPU registers for %d\n", pid);
+
+ /* XXX: should restore TM registers somehow? */
+ if (ext_regs->flags & USER_FPREGS_FL_FP) {
+ if (ptrace(PTRACE_SETFPREGS, pid, 0, (void *)&ext_regs->fpregs) < 0) {
+ pr_perror("Couldn't set floating-point registers");
+ ret = -1;
+ }
+ }
+
+ if (ext_regs->flags & USER_FPREGS_FL_ALTIVEC) {
+ if (ptrace(PTRACE_SETVRREGS, pid, 0, (void *)&ext_regs->vrregs) < 0) {
+ pr_perror("Couldn't set Altivec registers");
+ ret = -1;
+ }
+ if (ptrace(PTRACE_SETVSRREGS, pid, 0, (void *)ext_regs->vsxregs) < 0) {
+ pr_perror("Couldn't set VSX registers");
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6)
{
user_regs_struct_t regs = ctl->orig.regs;
int err;
@@ -406,21 +445,18 @@ int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret,
regs.gpr[7] = arg5;
regs.gpr[8] = arg6;
- err = compel_execute_syscall(ctl, ®s, (char*)code_syscall);
+ err = compel_execute_syscall(ctl, ®s, (char *)code_syscall);
*ret = regs.gpr[3];
return err;
}
-void *remote_mmap(struct parasite_ctl *ctl,
- void *addr, size_t length, int prot,
- int flags, int fd, off_t offset)
+void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd, off_t offset)
{
long map = 0;
int err;
- err = compel_syscall(ctl, __NR_mmap, &map,
- (unsigned long)addr, length, prot, flags, fd, offset);
+ err = compel_syscall(ctl, __NR_mmap, &map, (unsigned long)addr, length, prot, flags, fd, offset);
if (err < 0 || (long)map < 0)
map = 0;
@@ -430,13 +466,13 @@ void *remote_mmap(struct parasite_ctl *ctl,
void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
{
/*
- * OpenPOWER ABI requires that r12 is set to the calling function addressi
+ * OpenPOWER ABI requires that r12 is set to the calling function address
* to compute the TOC pointer.
*/
regs->gpr[12] = new_ip;
regs->nip = new_ip;
if (stack)
- regs->gpr[1] = (unsigned long) stack;
+ regs->gpr[1] = (unsigned long)stack - STACK_FRAME_MIN_SIZE;
regs->trap = 0;
}
@@ -453,9 +489,7 @@ int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
long ret;
int err;
- err = compel_syscall(ctl, __NR_sigaltstack,
- &ret, 0, (unsigned long)&s->uc.uc_stack,
- 0, 0, 0, 0);
+ err = compel_syscall(ctl, __NR_sigaltstack, &ret, 0, (unsigned long)&s->uc.uc_stack, 0, 0, 0, 0);
return err ? err : ret;
}
@@ -464,7 +498,7 @@ int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
*
* NOTE: 32bit tasks are not supported.
*/
-#define TASK_SIZE_64TB (0x0000400000000000UL)
+#define TASK_SIZE_64TB (0x0000400000000000UL)
#define TASK_SIZE_512TB (0x0002000000000000UL)
#define TASK_SIZE_MIN TASK_SIZE_64TB
diff --git a/compel/arch/riscv64/plugins/include/asm/prologue.h b/compel/arch/riscv64/plugins/include/asm/prologue.h
new file mode 100644
index 000000000..5c22b7b06
--- /dev/null
+++ b/compel/arch/riscv64/plugins/include/asm/prologue.h
@@ -0,0 +1,35 @@
+#ifndef __ASM_PROLOGUE_H__
+#define __ASM_PROLOGUE_H__
+
+#ifndef __ASSEMBLY__
+
+#include
+#include
+#include
+
+#include
+
+#define sys_recv(sockfd, ubuf, size, flags) sys_recvfrom(sockfd, ubuf, size, flags, NULL, NULL)
+
+typedef struct prologue_init_args {
+ struct sockaddr_un ctl_sock_addr;
+ unsigned int ctl_sock_addr_len;
+
+ unsigned int arg_s;
+ void *arg_p;
+
+ void *sigframe;
+} prologue_init_args_t;
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Reserve enough space for sigframe.
+ *
+ * FIXME It is rather should be taken from sigframe header.
+ */
+#define PROLOGUE_SGFRAME_SIZE 4096
+
+#define PROLOGUE_INIT_ARGS_SIZE 1024
+
+#endif /* __ASM_PROLOGUE_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/include/asm/syscall-types.h b/compel/arch/riscv64/plugins/include/asm/syscall-types.h
new file mode 100644
index 000000000..b9740a9ee
--- /dev/null
+++ b/compel/arch/riscv64/plugins/include/asm/syscall-types.h
@@ -0,0 +1,28 @@
+#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__
+#define COMPEL_ARCH_SYSCALL_TYPES_H__
+
+#define SA_RESTORER 0x04000000
+
+typedef void rt_signalfn_t(int, siginfo_t *, void *);
+typedef rt_signalfn_t *rt_sighandler_t;
+
+typedef void rt_restorefn_t(void);
+typedef rt_restorefn_t *rt_sigrestore_t;
+
+#define _KNSIG 64 // number of signals
+#define _NSIG_BPW 64 // number of signals per word
+
+#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
+
+typedef struct {
+ unsigned long sig[_KNSIG_WORDS];
+} k_rtsigset_t;
+
+typedef struct {
+ rt_sighandler_t rt_sa_handler;
+ unsigned long rt_sa_flags;
+ rt_sigrestore_t rt_sa_restorer;
+ k_rtsigset_t rt_sa_mask;
+} rt_sigaction_t;
+
+#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/include/features.h b/compel/arch/riscv64/plugins/include/features.h
new file mode 100644
index 000000000..274cee52a
--- /dev/null
+++ b/compel/arch/riscv64/plugins/include/features.h
@@ -0,0 +1,4 @@
+#ifndef __COMPEL_ARCH_FEATURES_H
+#define __COMPEL_ARCH_FEATURES_H
+
+#endif /* __COMPEL_ARCH_FEATURES_H */
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/parasite-head.S b/compel/arch/riscv64/plugins/std/parasite-head.S
new file mode 100644
index 000000000..3e9d272e3
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/parasite-head.S
@@ -0,0 +1,7 @@
+#include "common/asm/linkage.h"
+
+ .section .head.text, "ax"
+ENTRY(__export_parasite_head_start)
+ jal parasite_service
+ ebreak
+END(__export_parasite_head_start)
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/Makefile.syscalls b/compel/arch/riscv64/plugins/std/syscalls/Makefile.syscalls
new file mode 100644
index 000000000..5af35bcb4
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/Makefile.syscalls
@@ -0,0 +1,59 @@
+ccflags-y += -iquote $(PLUGIN_ARCH_DIR)/std/syscalls/
+asflags-y += -iquote $(PLUGIN_ARCH_DIR)/std/syscalls/
+
+sys-types := $(obj)/include/uapi/std/syscall-types.h
+sys-codes := $(obj)/include/uapi/std/syscall-codes.h
+sys-proto := $(obj)/include/uapi/std/syscall.h
+
+sys-def := $(PLUGIN_ARCH_DIR)/std/syscalls/syscall.def
+sys-asm-common-name := std/syscalls/syscall-common.S
+sys-asm-common := $(PLUGIN_ARCH_DIR)/$(sys-asm-common-name)
+sys-asm-types := $(obj)/include/uapi/std/asm/syscall-types.h
+sys-exec-tbl = $(PLUGIN_ARCH_DIR)/std/sys-exec-tbl.c
+
+sys-gen := $(PLUGIN_ARCH_DIR)/std/syscalls/gen-syscalls.pl
+sys-gen-tbl := $(PLUGIN_ARCH_DIR)/std/syscalls/gen-sys-exec-tbl.pl
+
+sys-asm := ./$(PLUGIN_ARCH_DIR)/std/syscalls/syscalls.S
+std-lib-y += $(sys-asm:.S=).o
+
+ifeq ($(ARCH),arm)
+arch_bits := 32
+else
+arch_bits := 64
+endif
+
+sys-exec-tbl := sys-exec-tbl.c
+
+$(sys-asm) $(sys-types) $(sys-codes) $(sys-proto): $(sys-gen) $(sys-def) $(sys-asm-common) $(sys-asm-types)
+ $(E) " GEN " $@
+ $(Q) perl \
+ $(sys-gen) \
+ $(sys-def) \
+ $(sys-codes) \
+ $(sys-proto) \
+ $(sys-asm) \
+ $(sys-asm-common-name) \
+ $(sys-types) \
+ $(arch_bits)
+
+$(sys-asm:.S=).o: $(sys-asm)
+
+$(sys-exec-tbl): $(sys-gen-tbl) $(sys-def)
+ $(E) " GEN " $@
+ $(Q) perl \
+ $(sys-gen-tbl) \
+ $(sys-def) \
+ $(sys-exec-tbl) \
+ $(arch_bits)
+
+$(sys-asm-types): $(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h
+ $(call msg-gen, $@)
+ $(Q) ln -s ../../../../../../$(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h $(sys-asm-types)
+ $(Q) ln -s ../../../../../$(PLUGIN_ARCH_DIR)/std/syscalls/syscall-aux.S $(obj)/include/uapi/std/syscall-aux.S
+ $(Q) ln -s ../../../../../$(PLUGIN_ARCH_DIR)/std/syscalls/syscall-aux.h $(obj)/include/uapi/std/syscall-aux.h
+
+std-headers-deps += $(sys-asm) $(sys-codes) $(sys-proto) $(sys-asm-types) $(sys-codes)
+mrproper-y += $(std-headers-deps)
+mrproper-y += $(obj)/include/uapi/std/syscall-aux.S
+mrproper-y += $(obj)/include/uapi/std/syscall-aux.h
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/gen-sys-exec-tbl.pl b/compel/arch/riscv64/plugins/std/syscalls/gen-sys-exec-tbl.pl
new file mode 100755
index 000000000..61a807eb6
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/gen-sys-exec-tbl.pl
@@ -0,0 +1,43 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+my $in = $ARGV[0];
+my $tblout = $ARGV[1];
+my $bits = $ARGV[2];
+
+my $code = "code$bits";
+
+open TBLOUT, ">", $tblout or die $!;
+open IN, "<", $in or die $!;
+
+print TBLOUT "/* Autogenerated, don't edit */\n";
+print TBLOUT "static struct syscall_exec_desc sc_exec_table[] = {\n";
+
+for () {
+ if ($_ =~ /\#/) {
+ next;
+ }
+
+ my $sys_name;
+ my $sys_num;
+
+ if (/(?\S+)\s+(?\S+)\s+(?\d+|\!)\s+(?(?:\d+|\!))\s+\((?.+)\)/) {
+ $sys_name = $+{alias};
+ } elsif (/(?\S+)\s+(?\d+|\!)\s+(?(?:\d+|\!))\s+\((?.+)\)/) {
+ $sys_name = $+{name};
+ } else {
+ unlink $tblout;
+ die "Invalid syscall definition file: invalid entry $_\n";
+ }
+
+ $sys_num = $+{$code};
+
+ if ($sys_num ne "!") {
+ print TBLOUT "SYSCALL($sys_name, $sys_num)\n";
+ }
+}
+
+print TBLOUT " { }, /* terminator */";
+print TBLOUT "};"
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/gen-syscalls.pl b/compel/arch/riscv64/plugins/std/syscalls/gen-syscalls.pl
new file mode 100755
index 000000000..a53f1962f
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/gen-syscalls.pl
@@ -0,0 +1,99 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+my $in = $ARGV[0];
+my $codesout = $ARGV[1];
+my $codes = $ARGV[1];
+$codes =~ s/.*include\/uapi\//compel\/plugins\//g;
+my $protosout = $ARGV[2];
+my $protos = $ARGV[2];
+$protos =~ s/.*include\/uapi\//compel\/plugins\//g;
+my $asmout = $ARGV[3];
+my $asmcommon = $ARGV[4];
+my $prototypes = $ARGV[5];
+$prototypes =~ s/.*include\/uapi\//compel\/plugins\//g;
+my $bits = $ARGV[6];
+
+my $codesdef = $codes;
+$codesdef =~ tr/.\-\//_/;
+my $protosdef = $protos;
+$protosdef =~ tr/.\-\//_/;
+my $code = "code$bits";
+my $need_aux = 0;
+
+unlink $codesout;
+unlink $protosout;
+unlink $asmout;
+
+open CODESOUT, ">", $codesout or die $!;
+open PROTOSOUT, ">", $protosout or die $!;
+open ASMOUT, ">", $asmout or die $!;
+open IN, "<", $in or die $!;
+
+print CODESOUT <<"END";
+/* Autogenerated, don't edit */
+#ifndef $codesdef
+#define $codesdef
+END
+
+print PROTOSOUT <<"END";
+/* Autogenerated, don't edit */
+#ifndef $protosdef
+#define $protosdef
+#include <$prototypes>
+#include <$codes>
+END
+
+print ASMOUT <<"END";
+/* Autogenerated, don't edit */
+#include <$codes>
+#include "$asmcommon"
+END
+
+
+for () {
+ if ($_ =~ /\#/) {
+ next;
+ }
+
+ my $code_macro;
+ my $sys_macro;
+ my $sys_name;
+
+ if (/(?\S+)\s+(?\S+)\s+(?\d+|\!)\s+(?(?:\d+|\!))\s+\((?.+)\)/) {
+ $code_macro = "__NR_$+{name}";
+ $sys_macro = "SYS_$+{name}";
+ $sys_name = "sys_$+{alias}";
+ } elsif (/(?\S+)\s+(?\d+|\!)\s+(?(?:\d+|\!))\s+\((?.+)\)/) {
+ $code_macro = "__NR_$+{name}";
+ $sys_macro = "SYS_$+{name}";
+ $sys_name = "sys_$+{name}";
+ } else {
+ unlink $codesout;
+ unlink $protosout;
+ unlink $asmout;
+
+ die "Invalid syscall definition file: invalid entry $_\n";
+ }
+
+ if ($+{$code} ne "!") {
+ print CODESOUT "#ifndef $code_macro\n#define $code_macro $+{$code}\n#endif\n";
+ print CODESOUT "#ifndef $sys_macro\n#define $sys_macro $code_macro\n#endif\n";
+ print ASMOUT "syscall $sys_name, $code_macro\n";
+
+ } else {
+ $need_aux = 1;
+ }
+
+ print PROTOSOUT "extern long $sys_name($+{args});\n";
+}
+
+if ($need_aux == 1) {
+ print ASMOUT "#include \n";
+ print CODESOUT "#include \n";
+}
+
+print CODESOUT "#endif /* $codesdef */";
+print PROTOSOUT "#endif /* $protosdef */";
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.S b/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.S
new file mode 100644
index 000000000..04160b7ac
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.S
@@ -0,0 +1,37 @@
+/**
+ * This source contains emulation of syscalls
+ * that are not implemented in the riscv64 Linux kernel
+ */
+
+ENTRY(sys_open)
+ add a3, x0, a2
+ add a2, x0, a1
+ add a1, x0, a0
+ addi a0, x0, -100
+ j sys_openat
+END(sys_open)
+
+
+ENTRY(sys_mkdir)
+ add a3,x0, a2
+ add a2, x0, a1
+ add a1, x0, a0
+ addi a0, x0, -100
+ j sys_mkdirat
+END(sys_mkdir)
+
+
+ENTRY(sys_rmdir)
+ addi a2, x0, 0x200 // flags = AT_REMOVEDIR
+ add a1, x0, a0
+ addi a0, x0, -100
+ j sys_unlinkat
+END(sys_rmdir)
+
+
+ENTRY(sys_unlink)
+ addi a2, x0, 0 // flags = 0
+ add a1, x0, a0
+ addi a0, x0, -100
+ j sys_unlinkat
+END(sys_unlink)
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.h b/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.h
new file mode 100644
index 000000000..881765bbb
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.h
@@ -0,0 +1,3 @@
+#ifndef __NR_openat
+#define __NR_openat 56
+#endif
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/syscall-common.S b/compel/arch/riscv64/plugins/std/syscalls/syscall-common.S
new file mode 100644
index 000000000..fdef3b47a
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/syscall-common.S
@@ -0,0 +1,17 @@
+#include "common/asm/linkage.h"
+
+syscall_common:
+ ecall
+ ret
+
+.macro syscall name, nr
+ ENTRY(\name)
+ li a7, \nr
+ j syscall_common
+ END(\name)
+.endm
+
+ENTRY(__cr_restore_rt)
+ li a7, __NR_rt_sigreturn
+ ecall
+END(__cr_restore_rt)
\ No newline at end of file
diff --git a/compel/arch/riscv64/plugins/std/syscalls/syscall.def b/compel/arch/riscv64/plugins/std/syscalls/syscall.def
new file mode 100644
index 000000000..967f097f9
--- /dev/null
+++ b/compel/arch/riscv64/plugins/std/syscalls/syscall.def
@@ -0,0 +1,125 @@
+#
+# System calls table, please make sure the table consists of only the syscalls
+# really used somewhere in the project.
+#
+# The template is (name and arguments are optional if you need only __NR_x
+# defined, but no real entry point in syscalls lib).
+#
+# name/alias code64 code32 arguments
+# -----------------------------------------------------------------------
+#
+read 63 3 (int fd, void *buf, unsigned long count)
+write 64 4 (int fd, const void *buf, unsigned long count)
+open ! 5 (const char *filename, unsigned long flags, unsigned long mode)
+close 57 6 (int fd)
+lseek 62 19 (int fd, unsigned long offset, unsigned long origin)
+mmap 222 ! (void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset)
+mprotect 226 125 (const void *addr, unsigned long len, unsigned long prot)
+munmap 215 91 (void *addr, unsigned long len)
+brk 214 45 (void *addr)
+rt_sigaction sigaction 134 174 (int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact, size_t sigsetsize)
+rt_sigprocmask sigprocmask 135 175 (int how, k_rtsigset_t *set, k_rtsigset_t *old, size_t sigsetsize)
+rt_sigreturn 139 173 (void)
+ioctl 29 54 (unsigned int fd, unsigned int cmd, unsigned long arg)
+pread64 67 180 (unsigned int fd, char *buf, size_t count, loff_t pos)
+ptrace 117 26 (long request, pid_t pid, void *addr, void *data)
+mremap 216 163 (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flag, unsigned long new_addr)
+mincore 232 219 (void *addr, unsigned long size, unsigned char *vec)
+madvise 233 220 (unsigned long start, size_t len, int behavior)
+shmat 196 305 (int shmid, void *shmaddr, int shmflag)
+pause 1061 29 (void)
+nanosleep 101 162 (struct timespec *req, struct timespec *rem)
+getitimer 102 105 (int which, const struct itimerval *val)
+setitimer 103 104 (int which, const struct itimerval *val, struct itimerval *old)
+getpid 172 20 (void)
+socket 198 281 (int domain, int type, int protocol)
+connect 203 283 (int sockfd, struct sockaddr *addr, int addrlen)
+sendto 206 290 (int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len)
+recvfrom 207 292 (int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len)
+sendmsg 211 296 (int sockfd, const struct msghdr *msg, int flags)
+recvmsg 212 297 (int sockfd, struct msghdr *msg, int flags)
+shutdown 210 293 (int sockfd, int how)
+bind 235 282 (int sockfd, const struct sockaddr *addr, int addrlen)
+setsockopt 208 294 (int sockfd, int level, int optname, const void *optval, socklen_t optlen)
+getsockopt 209 295 (int sockfd, int level, int optname, const void *optval, socklen_t *optlen)
+clone 220 120 (unsigned long flags, void *child_stack, void *parent_tid, unsigned long newtls, void *child_tid)
+exit 93 1 (unsigned long error_code)
+wait4 260 114 (int pid, int *status, int options, struct rusage *ru)
+waitid 95 280 (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
+kill 129 37 (long pid, int sig)
+fcntl 25 55 (int fd, int type, long arg)
+flock 32 143 (int fd, unsigned long cmd)
+mkdir ! 39 (const char *name, int mode)
+rmdir ! 40 (const char *name)
+unlink ! 10 (char *pathname)
+readlinkat 78 332 (int fd, const char *path, char *buf, int bufsize)
+umask 166 60 (int mask)
+getgroups 158 205 (int gsize, unsigned int *groups)
+setgroups 159 206 (int gsize, unsigned int *groups)
+setresuid 147 164 (int uid, int euid, int suid)
+getresuid 148 165 (int *uid, int *euid, int *suid)
+setresgid 149 170 (int gid, int egid, int sgid)
+getresgid 150 171 (int *gid, int *egid, int *sgid)
+getpgid 155 132 (pid_t pid)
+setfsuid 151 138 (int fsuid)
+setfsgid 152 139 (int fsgid)
+getsid 156 147 (void)
+capget 90 184 (struct cap_header *h, struct cap_data *d)
+capset 91 185 (struct cap_header *h, struct cap_data *d)
+rt_sigqueueinfo 138 178 (pid_t pid, int sig, siginfo_t *info)
+setpriority 140 97 (int which, int who, int nice)
+sched_setscheduler 119 156 (int pid, int policy, struct sched_param *p)
+sigaltstack 132 186 (const void *uss, void *uoss)
+personality 92 136 (unsigned int personality)
+prctl 167 172 (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
+arch_prctl ! 17 (int option, unsigned long addr)
+setrlimit 164 75 (int resource, struct krlimit *rlim)
+mount 40 21 (char *dev_nmae, char *dir_name, char *type, unsigned long flags, void *data)
+umount2 39 52 (char *name, int flags)
+gettid 178 224 (void)
+futex 98 240 (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3)
+set_tid_address 96 256 (int *tid_addr)
+restart_syscall 128 0 (void)
+timer_create 107 257 (clockid_t which_clock, struct sigevent *timer_event_spec, kernel_timer_t *created_timer_id)
+timer_settime 110 258 (kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting)
+timer_gettime 108 259 (int timer_id, const struct itimerspec *setting)
+timer_getoverrun 109 260 (int timer_id)
+timer_delete 111 261 (kernel_timer_t timer_id)
+clock_gettime 113 263 (clockid_t which_clock, struct timespec *tp)
+exit_group 94 248 (int error_code)
+set_robust_list 99 338 (struct robust_list_head *head, size_t len)
+get_robust_list 100 339 (int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
+signalfd4 74 355 (int fd, k_rtsigset_t *mask, size_t sizemask, int flags)
+rt_tgsigqueueinfo 240 363 (pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+vmsplice 75 343 (int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags)
+timerfd_settime 86 353 (int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr)
+fanotify_init 262 367 (unsigned int flags, unsigned int event_f_flags)
+fanotify_mark 263 368 (int fanotify_fd, unsigned int flags, uint64_t mask, int dfd, const char *pathname)
+open_by_handle_at 265 371 (int mountdirfd, struct file_handle *handle, int flags)
+setns 268 375 (int fd, int nstype)
+kcmp 272 378 (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
+openat 56 322 (int dirfd, const char *pathname, int flags, mode_t mode)
+mkdirat 34 323 (int dirfd, const char *pathname, mode_t mode)
+unlinkat 35 328 (int dirfd, const char *pathname, int flags)
+memfd_create 279 385 (const char *name, unsigned int flags)
+io_setup 0 243 (unsigned nr_events, aio_context_t *ctx)
+io_submit 2 246 (aio_context_t ctx_id, long nr, struct iocb **iocbpp)
+io_getevents 4 245 (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo)
+seccomp 277 383 (unsigned int op, unsigned int flags, const char *uargs)
+gettimeofday 169 78 (struct timeval *tv, struct timezone *tz)
+preadv_raw 69 361 (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h)
+userfaultfd 282 388 (int flags)
+fallocate 47 352 (int fd, int mode, loff_t offset, loff_t len)
+cacheflush ! 983042 (void *start, void *end, int flags)
+ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)
+fsopen 430 430 (char *fsname, unsigned int flags)
+fsconfig 431 431 (int fd, unsigned int cmd, const char *key, const char *value, int aux)
+fsmount 432 432 (int fd, unsigned int flags, unsigned int attr_flags)
+clone3 435 435 (struct clone_args *uargs, size_t size)
+pidfd_open 434 434 (pid_t pid, unsigned int flags)
+pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags)
+rseq 293 293 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+move_mount 429 429 (int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags)
+open_tree 428 428 (int dirfd, const char *pathname, unsigned int flags)
+openat2 437 437 (int dirfd, char *pathname, struct open_how *how, size_t size)
+membarrier 283 283 (int cmd, unsigned int flags, int cpu_id)
diff --git a/compel/arch/riscv64/scripts/compel-pack.lds.S b/compel/arch/riscv64/scripts/compel-pack.lds.S
new file mode 100644
index 000000000..a61235b44
--- /dev/null
+++ b/compel/arch/riscv64/scripts/compel-pack.lds.S
@@ -0,0 +1,32 @@
+OUTPUT_ARCH(riscv)
+EXTERN(__export_parasite_head_start)
+
+SECTIONS
+{
+ .crblob 0x0 : {
+ *(.head.text)
+ ASSERT(DEFINED(__export_parasite_head_start),
+ "Symbol __export_parasite_head_start is missing");
+ *(.text*)
+ . = ALIGN(32);
+ *(.data*)
+ . = ALIGN(32);
+ *(.rodata*)
+ . = ALIGN(32);
+ *(.bss*)
+ . = ALIGN(32);
+ *(.got*)
+ . = ALIGN(32);
+ *(.toc*)
+ . = ALIGN(32);
+ } =0x00000000,
+
+ /DISCARD/ : {
+ *(.debug*)
+ *(.comment*)
+ *(.note*)
+ *(.group*)
+ *(.eh_frame*)
+ *(*)
+ }
+}
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/cpu.c b/compel/arch/riscv64/src/lib/cpu.c
new file mode 100644
index 000000000..9a0291f70
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/cpu.c
@@ -0,0 +1,78 @@
+#include
+#include
+
+#include "compel-cpu.h"
+
+#include "common/bitops.h"
+
+#include "log.h"
+
+#undef LOG_PREFIX
+#define LOG_PREFIX "cpu: "
+
+static compel_cpuinfo_t rt_info;
+
+static void fetch_rt_cpuinfo(void)
+{
+ static bool rt_info_done = false;
+
+ if (!rt_info_done) {
+ compel_cpuid(&rt_info);
+ rt_info_done = true;
+ }
+}
+
+void compel_set_cpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+}
+void compel_clear_cpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+}
+int compel_test_cpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+ return 0;
+}
+int compel_test_fpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+ return 0;
+}
+int compel_cpuid(compel_cpuinfo_t *info)
+{
+ return 0;
+}
+
+bool compel_cpu_has_feature(unsigned int feature)
+{
+ fetch_rt_cpuinfo();
+ return compel_test_cpu_cap(&rt_info, feature);
+}
+
+bool compel_fpu_has_feature(unsigned int feature)
+{
+ fetch_rt_cpuinfo();
+ return compel_test_fpu_cap(&rt_info, feature);
+}
+
+uint32_t compel_fpu_feature_size(unsigned int feature)
+{
+ fetch_rt_cpuinfo();
+ return 0;
+}
+
+uint32_t compel_fpu_feature_offset(unsigned int feature)
+{
+ fetch_rt_cpuinfo();
+ return 0;
+}
+
+void compel_cpu_clear_feature(unsigned int feature)
+{
+ fetch_rt_cpuinfo();
+ return compel_clear_cpu_cap(&rt_info, feature);
+}
+
+void compel_cpu_copy_cpuinfo(compel_cpuinfo_t *c)
+{
+ fetch_rt_cpuinfo();
+ memcpy(c, &rt_info, sizeof(rt_info));
+}
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/handle-elf-host.c b/compel/arch/riscv64/src/lib/handle-elf-host.c
new file mode 120000
index 000000000..fe4611886
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/handle-elf-host.c
@@ -0,0 +1 @@
+handle-elf.c
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/handle-elf.c b/compel/arch/riscv64/src/lib/handle-elf.c
new file mode 100644
index 000000000..22420bc78
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/handle-elf.c
@@ -0,0 +1,32 @@
+#include
+#include
+
+#include "handle-elf.h"
+#include "piegen.h"
+#include "log.h"
+
+static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = {
+ 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const unsigned char __maybe_unused elf_ident_64_be[EI_NIDENT] = {
+ 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x02, 0x01, 0x00, /* clang-format */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+int handle_binary(void *mem, size_t size)
+{
+ const unsigned char *elf_ident =
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ elf_ident_64_le;
+#else
+ elf_ident_64_be;
+#endif
+
+ if (memcmp(mem, elf_ident, sizeof(elf_ident_64_le)) == 0)
+ return handle_elf_riscv64(mem, size);
+
+ pr_err("Unsupported Elf format detected\n");
+ return -EINVAL;
+}
\ No newline at end of file
diff --git a/test/zdtm/static/mntns-deleted-dst b/compel/arch/riscv64/src/lib/include/cpu.h
similarity index 100%
rename from test/zdtm/static/mntns-deleted-dst
rename to compel/arch/riscv64/src/lib/include/cpu.h
diff --git a/compel/arch/riscv64/src/lib/include/handle-elf.h b/compel/arch/riscv64/src/lib/include/handle-elf.h
new file mode 100644
index 000000000..582770583
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/handle-elf.h
@@ -0,0 +1,12 @@
+#ifndef COMPEL_HANDLE_ELF_H__
+#define COMPEL_HANDLE_ELF_H__
+
+#include "elf64-types.h"
+
+#define __handle_elf handle_elf_riscv64
+#define ELF_RISCV
+#define arch_is_machine_supported(e_machine) (e_machine == EM_RISCV)
+
+extern int handle_elf_riscv64(void *mem, size_t size);
+
+#endif /* COMPEL_HANDLE_ELF_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/syscall.h b/compel/arch/riscv64/src/lib/include/syscall.h
new file mode 100644
index 000000000..53f10525d
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/syscall.h
@@ -0,0 +1,8 @@
+#ifndef __COMPEL_SYSCALL_H__
+#define __COMPEL_SYSCALL_H__
+#define __NR(syscall, compat) \
+ ({ \
+ (void)compat; \
+ __NR_##syscall; \
+ })
+#endif
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/breakpoints.h b/compel/arch/riscv64/src/lib/include/uapi/asm/breakpoints.h
new file mode 100644
index 000000000..f2ba799cb
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/breakpoints.h
@@ -0,0 +1,15 @@
+#ifndef __COMPEL_BREAKPOINTS_H__
+#define __COMPEL_BREAKPOINTS_H__
+#define ARCH_SI_TRAP TRAP_BRKPT
+
+static inline int ptrace_set_breakpoint(pid_t pid, void *addr)
+{
+ return 0;
+}
+
+static inline int ptrace_flush_breakpoints(pid_t pid)
+{
+ return 0;
+}
+
+#endif
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/cpu.h b/compel/arch/riscv64/src/lib/include/uapi/asm/cpu.h
new file mode 100644
index 000000000..ac58567e3
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/cpu.h
@@ -0,0 +1,7 @@
+#ifndef UAPI_COMPEL_ASM_CPU_H__
+#define UAPI_COMPEL_ASM_CPU_H__
+
+typedef struct {
+} compel_cpuinfo_t;
+
+#endif /* UAPI_COMPEL_ASM_CPU_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/fpu.h b/compel/arch/riscv64/src/lib/include/uapi/asm/fpu.h
new file mode 100644
index 000000000..a74decc23
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/fpu.h
@@ -0,0 +1,4 @@
+#ifndef __CR_ASM_FPU_H__
+#define __CR_ASM_FPU_H__
+
+#endif /* __CR_ASM_FPU_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/riscv64/src/lib/include/uapi/asm/infect-types.h
new file mode 100644
index 000000000..192810cac
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/infect-types.h
@@ -0,0 +1,52 @@
+#ifndef UAPI_COMPEL_ASM_TYPES_H__
+#define UAPI_COMPEL_ASM_TYPES_H__
+
+#include
+#include
+#include
+#include
+
+#define SIGMAX 64
+#define SIGMAX_OLD 31
+
+/*
+ * Copied from the Linux kernel header arch/riscv/include/uapi/asm/ptrace.h
+ *
+ * A thread RISC-V CPU context
+ */
+typedef struct user_regs_struct user_regs_struct_t;
+typedef struct __riscv_d_ext_state user_fpregs_struct_t;
+
+#define __compel_arch_fetch_thread_area(tid, th) 0
+#define compel_arch_fetch_thread_area(tctl) 0
+#define compel_arch_get_tls_task(ctl, tls)
+#define compel_arch_get_tls_thread(tctl, tls)
+
+#define REG_RES(registers) ((uint64_t)(registers).a0)
+#define REG_IP(registers) ((uint64_t)(registers).pc)
+#define SET_REG_IP(registers, val) ((registers).pc = (val))
+
+/*
+ * REG_SP is also defined in riscv64-linux-gnu/include/sys/ucontext.h
+ * with a different meaning, and it's not used in CRIU. So we have to
+ * undefine it here.
+ */
+#ifdef REG_SP
+#undef REG_SP
+#endif
+
+#define REG_SP(registers) ((uint64_t)((registers).sp))
+
+#define REG_SYSCALL_NR(registers) ((uint64_t)(registers).a7)
+
+#define user_regs_native(pregs) true
+
+#define ARCH_SI_TRAP TRAP_BRKPT
+
+#define __NR(syscall, compat) \
+ ({ \
+ (void)compat; \
+ __NR_##syscall; \
+ })
+
+#endif /* UAPI_COMPEL_ASM_TYPES_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/instruction_formats.h b/compel/arch/riscv64/src/lib/include/uapi/asm/instruction_formats.h
new file mode 100644
index 000000000..e231d0465
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/instruction_formats.h
@@ -0,0 +1,26 @@
+#ifndef COMPEL_RELOCATIONS_H__
+#define COMPEL_RELOCATIONS_H__
+
+#include
+
+static inline uint32_t riscv_b_imm(uint32_t val)
+{
+ return (val & 0x00001000) << 19 | (val & 0x000007e0) << 20 | (val & 0x0000001e) << 7 | (val & 0x00000800) >> 4;
+}
+
+static inline uint32_t riscv_i_imm(uint32_t val)
+{
+ return val << 20;
+}
+
+static inline uint32_t riscv_u_imm(uint32_t val)
+{
+ return val & 0xfffff000;
+}
+
+static inline uint32_t riscv_j_imm(uint32_t val)
+{
+ return (val & 0x00100000) << 11 | (val & 0x000007fe) << 20 | (val & 0x00000800) << 9 | (val & 0x000ff000);
+}
+
+#endif /* COMPEL_RELOCATIONS_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/processor-flags.h b/compel/arch/riscv64/src/lib/include/uapi/asm/processor-flags.h
new file mode 100644
index 000000000..e40fb6fce
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/processor-flags.h
@@ -0,0 +1,4 @@
+#ifndef UAPI_COMPEL_ASM_PROCESSOR_FLAGS_H__
+#define UAPI_COMPEL_ASM_PROCESSOR_FLAGS_H__
+
+#endif /* UAPI_COMPEL_ASM_PROCESSOR_FLAGS_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/riscv64/src/lib/include/uapi/asm/sigframe.h
new file mode 100644
index 000000000..761a08f62
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/include/uapi/asm/sigframe.h
@@ -0,0 +1,68 @@
+#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__
+#define UAPI_COMPEL_ASM_SIGFRAME_H__
+
+#include
+
+#include
+
+#include
+
+/* Copied from the kernel header arch/riscv/include/uapi/asm/sigcontext.h */
+/*
+ * Signal context structure
+ *
+ * This contains the context saved before a signal handler is invoked;
+ * it is restored by sys_sigreturn / sys_rt_sigreturn.
+ */
+// struct sigcontext {
+// struct user_regs_struct sc_regs;
+// union __riscv_fp_state sc_fpregs;
+// /*
+// * 4K + 128 reserved for vector state and future expansion.
+// * This space is enough to store the vector context whose VLENB
+// * is less or equal to 128.
+// * (The size of the vector context is 4144 byte as VLENB is 128)
+// */
+// __u8 __reserved[4224] __attribute__((__aligned__(16)));
+// };
+
+#define rt_sigcontext sigcontext
+
+#include
+
+/* Copied from the kernel source arch/riscv/kernel/signal.c */
+struct rt_sigframe {
+ siginfo_t info;
+ ucontext_t uc; //ucontext_t structure holds the user context, e.g., the signal mask, GP regs
+};
+
+/*
+ generates inline assembly code for triggering the rt_sigreturn system call.
+ used to return from a signal handler back to the normal execution flow of the process.
+*/
+/* clang-format off */
+#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe) \
+ asm volatile( \
+ "mv sp, %0\n" \
+ "li a7, "__stringify(__NR_rt_sigreturn)" \n" \
+ "ecall\n" \
+ : \
+ : "r"(new_sp) \
+ : "a7", "memory")
+/* clang-format on */
+
+#define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->uc)
+#define RT_SIGFRAME_REGIP(rt_sigframe) ((long unsigned int)(rt_sigframe)->uc.uc_mcontext.__gregs[REG_PC])
+#define RT_SIGFRAME_HAS_FPU(rt_sigframe) 1
+#define RT_SIGFRAME_OFFSET(rt_sigframe) 0
+
+// #define RT_SIGFRAME_SIGCONTEXT(rt_sigframe) ((struct cr_sigcontext *)&(rt_sigframe)->uc.uc_mcontext)
+// #define RT_SIGFRAME_AUX_CONTEXT(rt_sigframe) ((struct sigcontext *)&(RT_SIGFRAME_SIGCONTEXT(rt_sigframe)->__reserved))
+// #define RT_SIGFRAME_FPU(rt_sigframe) (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->fpsimd)
+
+#define rt_sigframe_erase_sigset(sigframe) \
+ memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t)) // erase the signal mask
+#define rt_sigframe_copy_sigset(sigframe, from) \
+ memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t)) // copy the signal mask
+
+#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */
\ No newline at end of file
diff --git a/compel/arch/riscv64/src/lib/infect.c b/compel/arch/riscv64/src/lib/infect.c
new file mode 100644
index 000000000..3f3a4b7ec
--- /dev/null
+++ b/compel/arch/riscv64/src/lib/infect.c
@@ -0,0 +1,224 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include "common/page.h"
+#include "uapi/compel/asm/infect-types.h"
+#include "log.h"
+#include "errno.h"
+#include "infect.h"
+#include "infect-priv.h"
+
+unsigned __page_size = 0;
+unsigned __page_shift = 0;
+
+/*
+ * Injected syscall instruction
+ */
+const char code_syscall[] = {
+ 0x73, 0x00, 0x00, 0x00, /* ecall */
+ 0x73, 0x00, 0x10, 0x00 /* ebreak */
+};
+
+static const int code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long));
+
+static inline void __always_unused __check_code_syscall(void)
+{
+ BUILD_BUG_ON(code_syscall_aligned != BUILTIN_SYSCALL_SIZE);
+ BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
+}
+
+int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
+{
+ sigframe->uc.uc_mcontext.__gregs[0] = regs->pc;
+ sigframe->uc.uc_mcontext.__gregs[1] = regs->ra;
+ sigframe->uc.uc_mcontext.__gregs[2] = regs->sp;
+ sigframe->uc.uc_mcontext.__gregs[3] = regs->gp;
+ sigframe->uc.uc_mcontext.__gregs[4] = regs->tp;
+ sigframe->uc.uc_mcontext.__gregs[5] = regs->t0;
+ sigframe->uc.uc_mcontext.__gregs[6] = regs->t1;
+ sigframe->uc.uc_mcontext.__gregs[7] = regs->t2;
+ sigframe->uc.uc_mcontext.__gregs[8] = regs->s0;
+ sigframe->uc.uc_mcontext.__gregs[9] = regs->s1;
+ sigframe->uc.uc_mcontext.__gregs[10] = regs->a0;
+ sigframe->uc.uc_mcontext.__gregs[11] = regs->a1;
+ sigframe->uc.uc_mcontext.__gregs[12] = regs->a2;
+ sigframe->uc.uc_mcontext.__gregs[13] = regs->a3;
+ sigframe->uc.uc_mcontext.__gregs[14] = regs->a4;
+ sigframe->uc.uc_mcontext.__gregs[15] = regs->a5;
+ sigframe->uc.uc_mcontext.__gregs[16] = regs->a6;
+ sigframe->uc.uc_mcontext.__gregs[17] = regs->a7;
+ sigframe->uc.uc_mcontext.__gregs[18] = regs->s2;
+ sigframe->uc.uc_mcontext.__gregs[19] = regs->s3;
+ sigframe->uc.uc_mcontext.__gregs[20] = regs->s4;
+ sigframe->uc.uc_mcontext.__gregs[21] = regs->s5;
+ sigframe->uc.uc_mcontext.__gregs[22] = regs->s6;
+ sigframe->uc.uc_mcontext.__gregs[23] = regs->s7;
+ sigframe->uc.uc_mcontext.__gregs[24] = regs->s8;
+ sigframe->uc.uc_mcontext.__gregs[25] = regs->s9;
+ sigframe->uc.uc_mcontext.__gregs[26] = regs->s10;
+ sigframe->uc.uc_mcontext.__gregs[27] = regs->s11;
+ sigframe->uc.uc_mcontext.__gregs[28] = regs->t3;
+ sigframe->uc.uc_mcontext.__gregs[29] = regs->t4;
+ sigframe->uc.uc_mcontext.__gregs[30] = regs->t5;
+ sigframe->uc.uc_mcontext.__gregs[31] = regs->t6;
+
+ memcpy(sigframe->uc.uc_mcontext.__fpregs.__d.__f, fpregs->f, sizeof(fpregs->f));
+ sigframe->uc.uc_mcontext.__fpregs.__d.__fcsr = fpregs->fcsr;
+
+ return 0;
+}
+
+int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe)
+{
+ return 0;
+}
+
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
+ void *arg, __maybe_unused unsigned long flags)
+{
+ user_fpregs_struct_t tmp, *fpsimd = ext_regs ? ext_regs : &tmp;
+ struct iovec iov;
+ int ret = -1;
+
+ pr_info("Dumping FPU registers for %d\n", pid);
+
+ iov.iov_base = fpsimd;
+ iov.iov_len = sizeof(*fpsimd);
+ if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) {
+ pr_perror("Failed to obtain FPU registers for %d", pid);
+ return -1;
+ }
+
+ ret = save(pid, arg, regs, fpsimd);
+ return ret;
+}
+
+int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
+{
+ struct iovec iov;
+
+ pr_info("Restoring GP/FPU registers for %d\n", pid);
+
+ iov.iov_base = ext_regs;
+ iov.iov_len = sizeof(*ext_regs);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov)) {
+ pr_perror("Failed to set FPU registers for %d", pid);
+ return -1;
+ }
+ return 0;
+}
+
+int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6)
+{
+ user_regs_struct_t regs = ctl->orig.regs;
+ int err;
+
+ regs.a7 = (unsigned long)nr;
+ regs.a0 = arg1;
+ regs.a1 = arg2;
+ regs.a2 = arg3;
+ regs.a3 = arg4;
+ regs.a4 = arg5;
+ regs.a5 = arg6;
+ regs.a6 = 0;
+
+ err = compel_execute_syscall(ctl, ®s, code_syscall);
+
+ *ret = regs.a0;
+ return err;
+}
+
+/*
+ * Calling the mmap system call in the context of the target (victim) process using the compel_syscall function.
+ * Used during the infection process to allocate memory for the parasite code.
+*/
+void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+ long map;
+ int err;
+
+ err = compel_syscall(ctl, __NR_mmap, &map, (unsigned long)addr, length, prot, flags, fd, offset);
+ if (err < 0 || (long)map < 0)
+ map = 0;
+
+ return (void *)map;
+}
+
+void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
+{
+ regs->pc = new_ip;
+ if (stack)
+ regs->sp = (unsigned long)stack;
+}
+
+bool arch_can_dump_task(struct parasite_ctl *ctl)
+{
+ /*
+ * TODO: Add proper check here.
+ */
+ return true;
+}
+
+/*
+ * Fetch the signal alternate stack (sigaltstack),
+ * sas is a separate memory area for the signal handler to run on,
+ * avoiding potential issues with the main process stack
+*/
+int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
+{
+ long ret;
+ int err;
+
+ err = compel_syscall(ctl, __NR_sigaltstack, &ret, 0, (unsigned long)&s->uc.uc_stack, 0, 0, 0, 0);
+ return err ? err : ret;
+}
+
+/*
+ * Task size is the maximum virtual address space size that a process can occupy in the memory
+ * Refer to linux kernel arch/riscv/include/asm/pgtable.h,
+ * task size is:
+ * - 0x9fc00000 (~2.5GB) for RV32.
+ * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu
+ * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
+ * - 0x100000000000000 ( 64PB) for RV64 using SV57 mmu
+ */
+#define TASK_SIZE_MIN (1UL << 38)
+#define TASK_SIZE_MAX (1UL << 56)
+
+unsigned long compel_task_size(void)
+{
+ unsigned long task_size;
+
+ for (task_size = TASK_SIZE_MIN; task_size < TASK_SIZE_MAX; task_size <<= 1)
+ if (munmap((void *)task_size, page_size()))
+ break;
+ return task_size;
+}
+
+/*
+ * Get task registers (overwrites weak function)
+ */
+int ptrace_get_regs(int pid, user_regs_struct_t *regs)
+{
+ struct iovec iov;
+
+ iov.iov_base = regs;
+ iov.iov_len = sizeof(user_regs_struct_t);
+ return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov);
+}
+
+/*
+ * Set task registers (overwrites weak function)
+ */
+int ptrace_set_regs(int pid, user_regs_struct_t *regs)
+{
+ struct iovec iov;
+
+ iov.iov_base = regs;
+ iov.iov_len = sizeof(user_regs_struct_t);
+ return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov);
+}
diff --git a/compel/arch/s390/plugins/include/asm/syscall-types.h b/compel/arch/s390/plugins/include/asm/syscall-types.h
index 55d7ddb61..dd635399d 100644
--- a/compel/arch/s390/plugins/include/asm/syscall-types.h
+++ b/compel/arch/s390/plugins/include/asm/syscall-types.h
@@ -1,7 +1,7 @@
#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__
#define COMPEL_ARCH_SYSCALL_TYPES_H__
-#define SA_RESTORER 0x04000000U
+#define SA_RESTORER 0x04000000U
typedef void rt_signalfn_t(int, siginfo_t *, void *);
typedef rt_signalfn_t *rt_sighandler_t;
@@ -9,13 +9,13 @@ typedef rt_signalfn_t *rt_sighandler_t;
typedef void rt_restorefn_t(void);
typedef rt_restorefn_t *rt_sigrestore_t;
-#define _KNSIG 64
-#define _NSIG_BPW 64
+#define _KNSIG 64
+#define _NSIG_BPW 64
-#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
+#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
typedef struct {
- unsigned long sig[_KNSIG_WORDS];
+ unsigned long sig[_KNSIG_WORDS];
} k_rtsigset_t;
/*
@@ -23,10 +23,10 @@ typedef struct {
* include/linux/signal.h.
*/
typedef struct {
- rt_sighandler_t rt_sa_handler;
- unsigned long rt_sa_flags;
- rt_sigrestore_t rt_sa_restorer;
- k_rtsigset_t rt_sa_mask;
+ rt_sighandler_t rt_sa_handler;
+ unsigned long rt_sa_flags;
+ rt_sigrestore_t rt_sa_restorer;
+ k_rtsigset_t rt_sa_mask;
} rt_sigaction_t;
struct mmap_arg_struct;
diff --git a/compel/arch/s390/plugins/std/parasite-head.S b/compel/arch/s390/plugins/std/parasite-head.S
index f4cb37276..1e276a2f5 100644
--- a/compel/arch/s390/plugins/std/parasite-head.S
+++ b/compel/arch/s390/plugins/std/parasite-head.S
@@ -2,25 +2,7 @@
.section .head.text, "ax"
-/*
- * Entry point for parasite_service()
- *
- * Addresses of symbols are exported in auto-generated criu/pie/parasite-blob.h
- *
- * Function is called via parasite_run(). The command for parasite_service()
- * is stored in global variable __export_parasite_cmd.
- *
- * Load parameters for parasite_service(unsigned int cmd, void *args):
- *
- * - Parameter 1 (cmd) : %r2 = *(uint32 *)(__export_parasite_cmd + pc)
- * - Parameter 2 (args): %r3 = __export_parasite_args + pc
- */
ENTRY(__export_parasite_head_start)
- larl %r14,__export_parasite_cmd
- llgf %r2,0(%r14)
- larl %r3,__export_parasite_args
brasl %r14,parasite_service
.long 0x00010001 /* S390_BREAKPOINT_U16: Generates SIGTRAP */
-__export_parasite_cmd:
- .long 0
END(__export_parasite_head_start)
diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
index cc13a63dd..ff2f33006 100644
--- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
+++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
@@ -82,7 +82,7 @@ __NR_sys_timer_settime 255 sys_timer_settime (kernel_timer_t timer_id, int flag
__NR_sys_timer_gettime 256 sys_timer_gettime (int timer_id, const struct itimerspec *setting)
__NR_sys_timer_getoverrun 257 sys_timer_getoverrun (int timer_id)
__NR_sys_timer_delete 258 sys_timer_delete (kernel_timer_t timer_id)
-__NR_clock_gettime 260 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp)
+__NR_clock_gettime 260 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
__NR_exit_group 248 sys_exit_group (int error_code)
__NR_waitid 281 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
__NR_set_robust_list 304 sys_set_robust_list (struct robust_list_head *head, size_t len)
@@ -108,3 +108,15 @@ __NR_userfaultfd 355 sys_userfaultfd (int flags)
__NR_preadv 328 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h)
__NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz)
__NR_ppoll 302 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)
+__NR_open_tree 428 sys_open_tree (int dirfd, const char *pathname, unsigned int flags)
+__NR_move_mount 429 sys_move_mount (int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags)
+__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags)
+__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
+__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
+__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
+__NR_close_range 436 sys_close_range (unsigned int fd, unsigned int max_fd, unsigned int flags)
+__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
+__NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size)
+__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
+__NR_rseq 383 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+__NR_membarrier 356 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
diff --git a/compel/arch/s390/plugins/std/syscalls/syscalls-s390.c b/compel/arch/s390/plugins/std/syscalls/syscalls-s390.c
index 2b35cca4a..11c3284ab 100644
--- a/compel/arch/s390/plugins/std/syscalls/syscalls-s390.c
+++ b/compel/arch/s390/plugins/std/syscalls/syscalls-s390.c
@@ -3,14 +3,13 @@
/*
* Define prototype because of compile error if we include uapi/std/syscall.h
*/
-long sys_old_mmap (struct mmap_arg_struct *);
+long sys_old_mmap(struct mmap_arg_struct *);
/*
* On s390 we have defined __ARCH_WANT_SYS_OLD_MMAP - Therefore implement
* system call with one parameter "mmap_arg_struct".
*/
-unsigned long sys_mmap(void *addr, unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long fd,
+unsigned long sys_mmap(void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd,
unsigned long offset)
{
struct mmap_arg_struct arg_struct;
diff --git a/compel/arch/s390/scripts/compel-pack.lds.S b/compel/arch/s390/scripts/compel-pack.lds.S
index 91ffbda3e..a82118983 100644
--- a/compel/arch/s390/scripts/compel-pack.lds.S
+++ b/compel/arch/s390/scripts/compel-pack.lds.S
@@ -12,7 +12,7 @@ SECTIONS
*(.compel.init)
}
- .data : {
+ .data : ALIGN(0x1000) {
*(.data*)
*(.bss*)
}
@@ -33,8 +33,4 @@ SECTIONS
*(.group*)
*(.eh_frame*)
}
-
-/* Parasite args should have 4 bytes align, as we have futex inside. */
-. = ALIGN(4);
-__export_parasite_args = .;
}
diff --git a/compel/arch/s390/src/lib/cpu.c b/compel/arch/s390/src/lib/cpu.c
index 5d86bf239..c98607e16 100644
--- a/compel/arch/s390/src/lib/cpu.c
+++ b/compel/arch/s390/src/lib/cpu.c
@@ -9,7 +9,7 @@
#include "log.h"
-#undef LOG_PREFIX
+#undef LOG_PREFIX
#define LOG_PREFIX "cpu: "
static compel_cpuinfo_t rt_info;
@@ -23,10 +23,20 @@ static void fetch_rt_cpuinfo(void)
}
}
-void compel_set_cpu_cap(compel_cpuinfo_t *c, unsigned int feature) { }
-void compel_clear_cpu_cap(compel_cpuinfo_t *c, unsigned int feature) { }
-int compel_test_fpu_cap(compel_cpuinfo_t *info, unsigned int feature) { return 0; }
-int compel_test_cpu_cap(compel_cpuinfo_t *c, unsigned int feature) { return 0; }
+void compel_set_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
+{
+}
+void compel_clear_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
+{
+}
+int compel_test_fpu_cap(compel_cpuinfo_t *info, unsigned int feature)
+{
+ return 0;
+}
+int compel_test_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
+{
+ return 0;
+}
int compel_cpuid(compel_cpuinfo_t *info)
{
diff --git a/compel/arch/s390/src/lib/handle-elf.c b/compel/arch/s390/src/lib/handle-elf.c
index 01a8bf4c8..8e766dc1b 100644
--- a/compel/arch/s390/src/lib/handle-elf.c
+++ b/compel/arch/s390/src/lib/handle-elf.c
@@ -1,14 +1,12 @@
#include
-
-#include "uapi/compel.h"
+#include
#include "handle-elf.h"
#include "piegen.h"
#include "log.h"
-static const unsigned char __maybe_unused
-elf_ident_64[EI_NIDENT] = {
- 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x02, 0x01, 0x00,
+static const unsigned char __maybe_unused elf_ident_64[EI_NIDENT] = {
+ 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x02, 0x01, 0x00, /* clang-format */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
diff --git a/compel/arch/s390/src/lib/include/handle-elf.h b/compel/arch/s390/src/lib/include/handle-elf.h
index cd1357401..597d8059f 100644
--- a/compel/arch/s390/src/lib/include/handle-elf.h
+++ b/compel/arch/s390/src/lib/include/handle-elf.h
@@ -5,8 +5,8 @@
#define ELF_S390
-#define __handle_elf handle_elf_s390
-#define arch_is_machine_supported(e_machine) (e_machine == EM_S390)
+#define __handle_elf handle_elf_s390
+#define arch_is_machine_supported(e_machine) (e_machine == EM_S390)
int handle_elf_s390(void *mem, size_t size);
diff --git a/compel/arch/s390/src/lib/include/syscall.h b/compel/arch/s390/src/lib/include/syscall.h
index 57d49121f..828f29e4b 100644
--- a/compel/arch/s390/src/lib/include/syscall.h
+++ b/compel/arch/s390/src/lib/include/syscall.h
@@ -1,8 +1,7 @@
#ifndef __COMPEL_SYSCALL_H__
#define __COMPEL_SYSCALL_H__
-unsigned long sys_mmap(void *addr, unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long fd,
+unsigned long sys_mmap(void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd,
unsigned long offset);
#endif
diff --git a/compel/arch/s390/src/lib/include/uapi/asm/cpu.h b/compel/arch/s390/src/lib/include/uapi/asm/cpu.h
index b01db511d..c2652b2f4 100644
--- a/compel/arch/s390/src/lib/include/uapi/asm/cpu.h
+++ b/compel/arch/s390/src/lib/include/uapi/asm/cpu.h
@@ -4,7 +4,7 @@
#include
typedef struct {
- uint64_t hwcap[2];
+ uint64_t hwcap[2];
} compel_cpuinfo_t;
#endif /* __CR_ASM_CPU_H__ */
diff --git a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h
index fddf65d3b..87283bc6b 100644
--- a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h
+++ b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h
@@ -7,8 +7,8 @@
#include
#include "common/page.h"
-#define SIGMAX 64
-#define SIGMAX_OLD 31
+#define SIGMAX 64
+#define SIGMAX_OLD 31
/*
* Definitions from /usr/include/asm/ptrace.h:
@@ -33,28 +33,28 @@ typedef struct {
} vector128_t;
struct prfpreg {
- uint32_t fpc;
- uint64_t fprs[16];
+ uint32_t fpc;
+ uint64_t fprs[16];
};
-#define USER_FPREGS_VXRS 0x000000001
+#define USER_FPREGS_VXRS 0x000000001
/* Guarded-storage control block */
-#define USER_GS_CB 0x000000002
+#define USER_GS_CB 0x000000002
/* Guarded-storage broadcast control block */
-#define USER_GS_BC 0x000000004
+#define USER_GS_BC 0x000000004
/* Runtime-instrumentation control block */
-#define USER_RI_CB 0x000000008
+#define USER_RI_CB 0x000000008
/* Runtime-instrumentation bit set */
-#define USER_RI_ON 0x000000010
+#define USER_RI_ON 0x000000010
typedef struct {
- uint32_t flags;
- struct prfpreg prfpreg;
- uint64_t vxrs_low[16];
- vector128_t vxrs_high[16];
- uint64_t gs_cb[4];
- uint64_t gs_bc[4];
- uint64_t ri_cb[8];
+ uint32_t flags;
+ struct prfpreg prfpreg;
+ uint64_t vxrs_low[16];
+ vector128_t vxrs_high[16];
+ uint64_t gs_cb[4];
+ uint64_t gs_bc[4];
+ uint64_t ri_cb[8];
} user_fpregs_struct_t;
typedef struct {
@@ -62,18 +62,23 @@ typedef struct {
uint32_t system_call;
} user_regs_struct_t;
-#define REG_RES(r) ((uint64_t)(r).prstatus.gprs[2])
-#define REG_IP(r) ((uint64_t)(r).prstatus.psw.addr)
-#define REG_SP(r) ((uint64_t)(r).prstatus.gprs[15])
+#define REG_RES(r) ((uint64_t)(r).prstatus.gprs[2])
+#define REG_IP(r) ((uint64_t)(r).prstatus.psw.addr)
+#define SET_REG_IP(r, val) ((r).prstatus.psw.addr = (val))
+#define REG_SP(r) ((uint64_t)(r).prstatus.gprs[15])
/*
* We assume that REG_SYSCALL_NR() is only used for pie code where we
* always use svc 0 with opcode in %r1.
*/
-#define REG_SYSCALL_NR(r) ((uint64_t)(r).prstatus.gprs[1])
+#define REG_SYSCALL_NR(r) ((uint64_t)(r).prstatus.gprs[1])
-#define user_regs_native(pregs) true
+#define user_regs_native(pregs) true
-#define __NR(syscall, compat) __NR_##syscall
+#define __NR(syscall, compat) \
+ ({ \
+ (void)compat; \
+ __NR_##syscall; \
+ })
struct mmap_arg_struct {
unsigned long addr;
@@ -84,4 +89,9 @@ struct mmap_arg_struct {
unsigned long offset;
};
+#define __compel_arch_fetch_thread_area(tid, th) 0
+#define compel_arch_fetch_thread_area(tctl) 0
+#define compel_arch_get_tls_task(ctl, tls)
+#define compel_arch_get_tls_thread(tctl, tls)
+
#endif /* UAPI_COMPEL_ASM_TYPES_H__ */
diff --git a/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h b/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h
index b6b894473..965fef102 100644
--- a/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h
+++ b/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h
@@ -36,14 +36,14 @@ typedef struct {
* From /usr/include/uapi/asm/ucontext.h
*/
struct ucontext_extended {
- unsigned long uc_flags;
- ucontext_t *uc_link;
- stack_t uc_stack;
- _sigregs uc_mcontext;
- sigset_t uc_sigmask;
+ unsigned long uc_flags;
+ ucontext_t *uc_link;
+ stack_t uc_stack;
+ _sigregs uc_mcontext;
+ sigset_t uc_sigmask;
/* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
- unsigned char __unused[128 - sizeof(sigset_t)];
- _sigregs_ext uc_mcontext_ext;
+ unsigned char __unused[128 - sizeof(sigset_t)];
+ _sigregs_ext uc_mcontext_ext;
};
/*
@@ -59,6 +59,7 @@ struct rt_sigframe {
/*
* Do rt_sigreturn SVC
*/
+/* clang-format off */
#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe) \
asm volatile( \
"lgr %%r15,%0\n" \
@@ -66,15 +67,14 @@ struct rt_sigframe {
"svc 0\n" \
: \
: "d" (new_sp) \
- : "15", "memory")
+ : "memory")
+/* clang-format on */
-#define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->uc)
-#define RT_SIGFRAME_REGIP(rt_sigframe) (rt_sigframe)->uc.uc_mcontext.regs.psw.addr
-#define RT_SIGFRAME_HAS_FPU(rt_sigframe) (1)
+#define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->uc)
+#define RT_SIGFRAME_REGIP(rt_sigframe) (rt_sigframe)->uc.uc_mcontext.regs.psw.addr
+#define RT_SIGFRAME_HAS_FPU(rt_sigframe) (1)
-#define rt_sigframe_erase_sigset(sigframe) \
- memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t))
-#define rt_sigframe_copy_sigset(sigframe, from) \
- memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t))
+#define rt_sigframe_erase_sigset(sigframe) memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t))
+#define rt_sigframe_copy_sigset(sigframe, from) memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t))
#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */
diff --git a/compel/arch/s390/src/lib/infect.c b/compel/arch/s390/src/lib/infect.c
index 00e9c36d2..a77b38917 100644
--- a/compel/arch/s390/src/lib/infect.c
+++ b/compel/arch/s390/src/lib/infect.c
@@ -5,6 +5,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -16,25 +17,23 @@
#include "ptrace.h"
#include "infect-priv.h"
-#define NT_PRFPREG 2
-#define NT_S390_VXRS_LOW 0x309
-#define NT_S390_VXRS_HIGH 0x30a
-#define NT_S390_GS_CB 0x30b
-#define NT_S390_GS_BC 0x30c
-#define NT_S390_RI_CB 0x30d
+#define NT_PRFPREG 2
+#define NT_S390_VXRS_LOW 0x309
+#define NT_S390_VXRS_HIGH 0x30a
+#define NT_S390_GS_CB 0x30b
+#define NT_S390_GS_BC 0x30c
+#define NT_S390_RI_CB 0x30d
/*
* Print general purpose and access registers
*/
-static void print_user_regs_struct(const char *msg, int pid,
- user_regs_struct_t *regs)
+static void print_user_regs_struct(const char *msg, int pid, user_regs_struct_t *regs)
{
int i;
pr_debug("%s: Registers for pid=%d\n", msg, pid);
- pr_debug("system_call %08lx\n", (unsigned long) regs->system_call);
- pr_debug(" psw %016lx %016lx\n", regs->prstatus.psw.mask,
- regs->prstatus.psw.addr);
+ pr_debug("system_call %08lx\n", (unsigned long)regs->system_call);
+ pr_debug(" psw %016lx %016lx\n", regs->prstatus.psw.mask, regs->prstatus.psw.addr);
pr_debug(" orig_gpr2 %016lx\n", regs->prstatus.orig_gpr2);
for (i = 0; i < 16; i++)
pr_debug(" g%02d %016lx\n", i, regs->prstatus.gprs[i]);
@@ -56,9 +55,7 @@ static void print_vxrs(user_fpregs_struct_t *fpregs)
for (i = 0; i < 16; i++)
pr_debug(" vx_low%02d %016lx\n", i, fpregs->vxrs_low[i]);
for (i = 0; i < 16; i++)
- pr_debug(" vx_high%02d %016lx %016lx\n", i,
- fpregs->vxrs_high[i].part1,
- fpregs->vxrs_high[i].part2);
+ pr_debug(" vx_high%02d %016lx %016lx\n", i, fpregs->vxrs_high[i].part1, fpregs->vxrs_high[i].part2);
}
/*
@@ -110,8 +107,7 @@ static void print_ri_cb(user_fpregs_struct_t *fpregs)
* Print FP registers, VX registers, guarded-storage, and
* runtime-instrumentation
*/
-static void print_user_fpregs_struct(const char *msg, int pid,
- user_fpregs_struct_t *fpregs)
+static void print_user_fpregs_struct(const char *msg, int pid, user_fpregs_struct_t *fpregs)
{
int i;
@@ -125,28 +121,19 @@ static void print_user_fpregs_struct(const char *msg, int pid,
print_ri_cb(fpregs);
}
-int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe,
- user_regs_struct_t *regs,
- user_fpregs_struct_t *fpregs)
+int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
{
_sigregs_ext *dst_ext = &sigframe->uc.uc_mcontext_ext;
_sigregs *dst = &sigframe->uc.uc_mcontext;
- memcpy(dst->regs.gprs, regs->prstatus.gprs,
- sizeof(regs->prstatus.gprs));
- memcpy(dst->regs.acrs, regs->prstatus.acrs,
- sizeof(regs->prstatus.acrs));
- memcpy(&dst->regs.psw, ®s->prstatus.psw,
- sizeof(regs->prstatus.psw));
- memcpy(&dst->fpregs.fpc, &fpregs->prfpreg.fpc,
- sizeof(fpregs->prfpreg.fpc));
- memcpy(&dst->fpregs.fprs, &fpregs->prfpreg.fprs,
- sizeof(fpregs->prfpreg.fprs));
+ memcpy(dst->regs.gprs, regs->prstatus.gprs, sizeof(regs->prstatus.gprs));
+ memcpy(dst->regs.acrs, regs->prstatus.acrs, sizeof(regs->prstatus.acrs));
+ memcpy(&dst->regs.psw, ®s->prstatus.psw, sizeof(regs->prstatus.psw));
+ memcpy(&dst->fpregs.fpc, &fpregs->prfpreg.fpc, sizeof(fpregs->prfpreg.fpc));
+ memcpy(&dst->fpregs.fprs, &fpregs->prfpreg.fprs, sizeof(fpregs->prfpreg.fprs));
if (fpregs->flags & USER_FPREGS_VXRS) {
- memcpy(&dst_ext->vxrs_low, &fpregs->vxrs_low,
- sizeof(fpregs->vxrs_low));
- memcpy(&dst_ext->vxrs_high, &fpregs->vxrs_high,
- sizeof(fpregs->vxrs_high));
+ memcpy(&dst_ext->vxrs_low, &fpregs->vxrs_low, sizeof(fpregs->vxrs_low));
+ memcpy(&dst_ext->vxrs_high, &fpregs->vxrs_high, sizeof(fpregs->vxrs_high));
} else {
memset(&dst_ext->vxrs_low, 0, sizeof(dst_ext->vxrs_low));
memset(&dst_ext->vxrs_high, 0, sizeof(dst_ext->vxrs_high));
@@ -154,8 +141,7 @@ int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe,
return 0;
}
-int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe,
- struct rt_sigframe *rsigframe)
+int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe)
{
return 0;
}
@@ -168,9 +154,7 @@ static inline void rewind_psw(psw_t *psw, unsigned long bytes)
unsigned long mask;
pr_debug("Rewind psw: %016lx bytes=%lu\n", psw->addr, bytes);
- mask = (psw->mask & PSW_MASK_EA) ? -1UL :
- (psw->mask & PSW_MASK_BA) ? (1UL << 31) - 1 :
- (1UL << 24) - 1;
+ mask = (psw->mask & PSW_MASK_EA) ? -1UL : (psw->mask & PSW_MASK_BA) ? (1UL << 31) - 1 : (1UL << 24) - 1;
psw->addr = (psw->addr - bytes) & mask;
}
@@ -195,13 +179,13 @@ int get_vx_regs(pid_t pid, user_fpregs_struct_t *fpregs)
pr_debug("VXRS registers not supported\n");
return 0;
}
- pr_perror("Couldn't get VXRS_LOW\n");
+ pr_perror("Couldn't get VXRS_LOW");
return -1;
}
iov.iov_base = &fpregs->vxrs_high;
iov.iov_len = sizeof(fpregs->vxrs_high);
if (ptrace(PTRACE_GETREGSET, pid, NT_S390_VXRS_HIGH, &iov) < 0) {
- pr_perror("Couldn't get VXRS_HIGH\n");
+ pr_perror("Couldn't get VXRS_HIGH");
return -1;
}
fpregs->flags |= USER_FPREGS_VXRS;
@@ -242,7 +226,7 @@ int get_gs_cb(pid_t pid, user_fpregs_struct_t *fpregs)
pr_debug("GS_BC not set\n");
return 0;
}
- pr_perror("Couldn't get GS_BC\n");
+ pr_perror("Couldn't get GS_BC");
return -1;
}
fpregs->flags |= USER_GS_BC;
@@ -273,7 +257,7 @@ int get_ri_cb(pid_t pid, user_fpregs_struct_t *fpregs)
pr_debug("RI_CB not set\n");
return 0;
default:
- pr_perror("Couldn't get RI_CB\n");
+ pr_perror("Couldn't get RI_CB");
return -1;
}
}
@@ -309,31 +293,30 @@ static int s390_disable_ri_bit(pid_t pid, user_regs_struct_t *regs)
/*
* Prepare task registers for restart
*/
-int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save,
- void *arg, __maybe_unused unsigned long flags)
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs, save_regs_t save,
+ void *arg, __maybe_unused unsigned long flags)
{
- user_fpregs_struct_t fpregs;
struct iovec iov;
int rewind;
- print_user_regs_struct("get_task_regs", pid, regs);
+ print_user_regs_struct("compel_get_task_regs", pid, regs);
- memset(&fpregs, 0, sizeof(fpregs));
- iov.iov_base = &fpregs.prfpreg;
- iov.iov_len = sizeof(fpregs.prfpreg);
+ memset(fpregs, 0, sizeof(*fpregs));
+ iov.iov_base = &fpregs->prfpreg;
+ iov.iov_len = sizeof(fpregs->prfpreg);
if (ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov) < 0) {
pr_perror("Couldn't get floating-point registers");
return -1;
}
- if (get_vx_regs(pid, &fpregs)) {
+ if (get_vx_regs(pid, fpregs)) {
pr_perror("Couldn't get vector registers");
return -1;
}
- if (get_gs_cb(pid, &fpregs)) {
+ if (get_gs_cb(pid, fpregs)) {
pr_perror("Couldn't get guarded-storage");
return -1;
}
- if (get_ri_cb(pid, &fpregs)) {
+ if (get_ri_cb(pid, fpregs)) {
pr_perror("Couldn't get runtime-instrumentation");
return -1;
}
@@ -342,10 +325,10 @@ int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save,
* before we execute parasite code. Otherwise parasite operations
* would be recorded.
*/
- if (fpregs.flags & USER_RI_ON)
+ if (fpregs->flags & USER_RI_ON)
s390_disable_ri_bit(pid, regs);
- print_user_fpregs_struct("get_task_regs", pid, &fpregs);
+ print_user_fpregs_struct("compel_get_task_regs", pid, fpregs);
/* Check for system call restarting. */
if (regs->system_call) {
rewind = regs->system_call >> 16;
@@ -365,17 +348,72 @@ int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save,
}
}
/* Call save_task_regs() */
- return save(arg, regs, &fpregs);
+ return save(pid, arg, regs, fpregs);
+}
+
+int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
+{
+ struct iovec iov;
+ int ret = 0;
+
+ iov.iov_base = &ext_regs->prfpreg;
+ iov.iov_len = sizeof(ext_regs->prfpreg);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov) < 0) {
+ pr_perror("Couldn't set floating-point registers");
+ ret = -1;
+ }
+
+ if (ext_regs->flags & USER_FPREGS_VXRS) {
+ iov.iov_base = &ext_regs->vxrs_low;
+ iov.iov_len = sizeof(ext_regs->vxrs_low);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_S390_VXRS_LOW, &iov) < 0) {
+ pr_perror("Couldn't set VXRS_LOW");
+ ret = -1;
+ }
+
+ iov.iov_base = &ext_regs->vxrs_high;
+ iov.iov_len = sizeof(ext_regs->vxrs_high);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_S390_VXRS_HIGH, &iov) < 0) {
+ pr_perror("Couldn't set VXRS_HIGH");
+ ret = -1;
+ }
+ }
+
+ if (ext_regs->flags & USER_GS_CB) {
+ iov.iov_base = &ext_regs->gs_cb;
+ iov.iov_len = sizeof(ext_regs->gs_cb);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_S390_GS_CB, &iov) < 0) {
+ pr_perror("Couldn't set GS_CB");
+ ret = -1;
+ }
+ iov.iov_base = &ext_regs->gs_bc;
+ iov.iov_len = sizeof(ext_regs->gs_bc);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_S390_GS_BC, &iov) < 0) {
+ pr_perror("Couldn't set GS_BC");
+ ret = -1;
+ }
+ }
+
+ if (ext_regs->flags & USER_RI_CB) {
+ iov.iov_base = &ext_regs->ri_cb;
+ iov.iov_len = sizeof(ext_regs->ri_cb);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_S390_RI_CB, &iov) < 0) {
+ pr_perror("Couldn't set RI_CB");
+ ret = -1;
+ }
+ }
+
+ return ret;
}
/*
* Injected syscall instruction
*/
const char code_syscall[] = {
- 0x0a, 0x00, /* sc 0 */
- 0x00, 0x01, /* S390_BREAKPOINT_U16 */
- 0x00, 0x01, /* S390_BREAKPOINT_U16 */
- 0x00, 0x01, /* S390_BREAKPOINT_U16 */
+ 0x0a, 0x00, /* sc 0 */
+ 0x00, 0x01, /* S390_BREAKPOINT_U16 */
+ 0x00, 0x01, /* S390_BREAKPOINT_U16 */
+ 0x00, 0x01, /* S390_BREAKPOINT_U16 */
};
static inline void __check_code_syscall(void)
@@ -387,19 +425,14 @@ static inline void __check_code_syscall(void)
/*
* Issue s390 system call
*/
-int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret,
- unsigned long arg1,
- unsigned long arg2,
- unsigned long arg3,
- unsigned long arg4,
- unsigned long arg5,
- unsigned long arg6)
+int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6)
{
user_regs_struct_t regs = ctl->orig.regs;
int err;
/* Load syscall number into %r1 */
- regs.prstatus.gprs[1] = (unsigned long) nr;
+ regs.prstatus.gprs[1] = (unsigned long)nr;
/* Load parameter registers %r2-%r7 */
regs.prstatus.gprs[2] = arg1;
regs.prstatus.gprs[3] = arg2;
@@ -408,7 +441,7 @@ int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret,
regs.prstatus.gprs[6] = arg5;
regs.prstatus.gprs[7] = arg6;
- err = compel_execute_syscall(ctl, ®s, (char *) code_syscall);
+ err = compel_execute_syscall(ctl, ®s, (char *)code_syscall);
/* Return code from system is in %r2 */
if (ret)
@@ -419,9 +452,7 @@ int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret,
/*
* Issue s390 mmap call
*/
-void *remote_mmap(struct parasite_ctl *ctl,
- void *addr, size_t length, int prot,
- int flags, int fd, off_t offset)
+void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd, off_t offset)
{
void *where = (void *)ctl->ictx.syscall_ip + BUILTIN_SYSCALL_SIZE;
struct mmap_arg_struct arg_struct;
@@ -444,8 +475,7 @@ void *remote_mmap(struct parasite_ctl *ctl,
}
/* Do syscall */
- err = compel_syscall(ctl, __NR_mmap, &map, (unsigned long) where,
- 0, 0, 0, 0, 0);
+ err = compel_syscall(ctl, __NR_mmap, &map, (unsigned long)where, 0, 0, 0, 0, 0);
if (err < 0 || (long)map < 0)
map = 0;
@@ -453,8 +483,9 @@ void *remote_mmap(struct parasite_ctl *ctl,
if (ptrace_poke_area(pid, &arg_struct, where, sizeof(arg_struct))) {
pr_err("Can't restore mmap args (pid: %d)\n", pid);
if (map != 0) {
- compel_syscall(ctl, __NR_munmap, NULL, map,
- length, 0, 0, 0, 0);
+ err = compel_syscall(ctl, __NR_munmap, NULL, map, length, 0, 0, 0, 0);
+ if (err)
+ pr_err("Can't munmap %d\n", err);
map = 0;
}
}
@@ -465,14 +496,12 @@ void *remote_mmap(struct parasite_ctl *ctl,
/*
* Setup registers for parasite call
*/
-void parasite_setup_regs(unsigned long new_ip, void *stack,
- user_regs_struct_t *regs)
+void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
{
regs->prstatus.psw.addr = new_ip;
if (!stack)
return;
- regs->prstatus.gprs[15] = ((unsigned long) stack) -
- STACK_FRAME_OVERHEAD;
+ regs->prstatus.gprs[15] = ((unsigned long)stack) - STACK_FRAME_OVERHEAD;
}
/*
@@ -520,9 +549,7 @@ int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
long ret;
int err;
- err = compel_syscall(ctl, __NR_sigaltstack,
- &ret, 0, (unsigned long)&s->uc.uc_stack,
- 0, 0, 0, 0);
+ err = compel_syscall(ctl, __NR_sigaltstack, &ret, 0, (unsigned long)&s->uc.uc_stack, 0, 0, 0, 0);
return err ? err : ret;
}
@@ -596,9 +623,9 @@ enum kernel_ts_level {
};
/* See arch/s390/include/asm/processor.h */
-#define TASK_SIZE_LEVEL_3 0x40000000000UL /* 4 TB */
-#define TASK_SIZE_LEVEL_4 0x20000000000000UL /* 8 PB */
-#define TASK_SIZE_LEVEL_5 0xffffffffffffefffUL /* 16 EB - 0x1000 */
+#define TASK_SIZE_LEVEL_3 0x40000000000UL /* 4 TB */
+#define TASK_SIZE_LEVEL_4 0x20000000000000UL /* 8 PB */
+#define TASK_SIZE_LEVEL_5 0xffffffffffffefffUL /* 16 EB - 0x1000 */
/*
* Return detected kernel version regarding task size level
@@ -612,12 +639,12 @@ static enum kernel_ts_level get_kernel_ts_level(void)
/* Check for 5 levels */
if (criu_end_addr >= TASK_SIZE_LEVEL_4)
return KERNEL_TS_LEVEL_5;
- else if (munmap((void *) TASK_SIZE_LEVEL_4, 0x1000) == 0)
+ else if (munmap((void *)TASK_SIZE_LEVEL_4, 0x1000) == 0)
return KERNEL_TS_LEVEL_5;
if (criu_end_addr < TASK_SIZE_LEVEL_3) {
/* Check for 4 level kernel with fix */
- if (munmap((void *) TASK_SIZE_LEVEL_3, 0x1000) == 0)
+ if (munmap((void *)TASK_SIZE_LEVEL_3, 0x1000) == 0)
return KERNEL_TS_LEVEL_4_FIX_YES;
else
return KERNEL_TS_LEVEL_4_FIX_NO;
@@ -658,14 +685,6 @@ unsigned long compel_task_size(void)
/*
* Get task registers (overwrites weak function)
- *
- * We don't store floating point and vector registers here because we
- * assue that compel/pie code does not change them.
- *
- * For verification issue:
- *
- * $ objdump -S criu/pie/parasite.built-in.bin.o | grep "%f"
- * $ objdump -S criu/pie/restorer.built-in.bin.o | grep "%f"
*/
int ptrace_get_regs(int pid, user_regs_struct_t *regs)
{
diff --git a/compel/arch/x86/plugins/include/asm/prologue.h b/compel/arch/x86/plugins/include/asm/prologue.h
index 9d812eec9..c19ce54d7 100644
--- a/compel/arch/x86/plugins/include/asm/prologue.h
+++ b/compel/arch/x86/plugins/include/asm/prologue.h
@@ -9,17 +9,16 @@
#include
-#define sys_recv(sockfd, ubuf, size, flags) \
- sys_recvfrom(sockfd, ubuf, size, flags, NULL, NULL)
+#define sys_recv(sockfd, ubuf, size, flags) sys_recvfrom(sockfd, ubuf, size, flags, NULL, NULL)
typedef struct prologue_init_args {
- struct sockaddr_un ctl_sock_addr;
- unsigned int ctl_sock_addr_len;
+ struct sockaddr_un ctl_sock_addr;
+ unsigned int ctl_sock_addr_len;
- unsigned int arg_s;
- void *arg_p;
+ unsigned int arg_s;
+ void *arg_p;
- void *sigframe;
+ void *sigframe;
} prologue_init_args_t;
#endif /* __ASSEMBLY__ */
@@ -29,8 +28,8 @@ typedef struct prologue_init_args {
*
* FIXME It is rather should be taken from sigframe header.
*/
-#define PROLOGUE_SGFRAME_SIZE 4096
+#define PROLOGUE_SGFRAME_SIZE 4096
-#define PROLOGUE_INIT_ARGS_SIZE 1024
+#define PROLOGUE_INIT_ARGS_SIZE 1024
#endif /* __ASM_PROLOGUE_H__ */
diff --git a/compel/arch/x86/plugins/include/asm/syscall-types.h b/compel/arch/x86/plugins/include/asm/syscall-types.h
index 9874fd0be..6987aad16 100644
--- a/compel/arch/x86/plugins/include/asm/syscall-types.h
+++ b/compel/arch/x86/plugins/include/asm/syscall-types.h
@@ -8,12 +8,12 @@ typedef rt_signalfn_t *rt_sighandler_t;
typedef void rt_restorefn_t(void);
typedef rt_restorefn_t *rt_sigrestore_t;
-#define SA_RESTORER 0x04000000
+#define SA_RESTORER 0x04000000
-#define _KNSIG 64
-#define _NSIG_BPW 64
+#define _KNSIG 64
+#define _NSIG_BPW 64
-#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
+#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW)
/*
* Note: as k_rtsigset_t is the same size for 32-bit and 64-bit,
@@ -21,14 +21,14 @@ typedef rt_restorefn_t *rt_sigrestore_t;
* purpose if we ever going to support native 32-bit compilation.
*/
typedef struct {
- uint64_t sig[_KNSIG_WORDS];
+ uint64_t sig[_KNSIG_WORDS];
} k_rtsigset_t;
typedef struct {
- rt_sighandler_t rt_sa_handler;
- unsigned long rt_sa_flags;
- rt_sigrestore_t rt_sa_restorer;
- k_rtsigset_t rt_sa_mask;
+ rt_sighandler_t rt_sa_handler;
+ unsigned long rt_sa_flags;
+ rt_sigrestore_t rt_sa_restorer;
+ k_rtsigset_t rt_sa_mask;
} rt_sigaction_t;
/*
@@ -37,24 +37,24 @@ typedef struct {
* with unaligned rt_sa_mask.
*/
typedef struct __attribute__((packed)) {
- unsigned int rt_sa_handler;
- unsigned int rt_sa_flags;
- unsigned int rt_sa_restorer;
- k_rtsigset_t rt_sa_mask;
+ unsigned int rt_sa_handler;
+ unsigned int rt_sa_flags;
+ unsigned int rt_sa_restorer;
+ k_rtsigset_t rt_sa_mask;
} rt_sigaction_t_compat;
/* Types for set_thread_area, get_thread_area syscalls */
typedef struct {
- unsigned int entry_number;
- unsigned int base_addr;
- unsigned int limit;
- unsigned int seg_32bit:1;
- unsigned int contents:2;
- unsigned int read_exec_only:1;
- unsigned int limit_in_pages:1;
- unsigned int seg_not_present:1;
- unsigned int useable:1;
- unsigned int lm:1;
+ unsigned int entry_number;
+ unsigned int base_addr;
+ unsigned int limit;
+ unsigned int seg_32bit : 1;
+ unsigned int contents : 2;
+ unsigned int read_exec_only : 1;
+ unsigned int limit_in_pages : 1;
+ unsigned int seg_not_present : 1;
+ unsigned int usable : 1;
+ unsigned int lm : 1;
} user_desc_t;
#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */
diff --git a/compel/arch/x86/plugins/std/parasite-head.S b/compel/arch/x86/plugins/std/parasite-head.S
index a988de9d4..42cad4808 100644
--- a/compel/arch/x86/plugins/std/parasite-head.S
+++ b/compel/arch/x86/plugins/std/parasite-head.S
@@ -6,16 +6,6 @@
# error 64-bit parasite should compile with CONFIG_X86_64
#endif
-.macro PARASITE_ENTRY num
- subq $16, %rsp
- andq $~15, %rsp
- pushq $\num
- movq %rsp, %rbp
- movl __export_parasite_cmd(%rip), %edi
- leaq __export_parasite_args(%rip), %rsi
- call parasite_service
-.endm
-
#ifdef CONFIG_COMPAT
.code32
ENTRY(__export_parasite_head_start_compat)
@@ -23,9 +13,11 @@ ENTRY(__export_parasite_head_start_compat)
jmp $__USER_CS,$1f
1:
.code64
- PARASITE_ENTRY 0
+ call parasite_service
pushq $__USER32_CS
- pushq $2f
+ xor %r11, %r11
+ movl $2f, %r11d
+ pushq %r11
lretq
2:
.code32
@@ -42,11 +34,21 @@ END(__export_parasite_head_start_compat)
.code64
#endif
+/*
+ * When parasite_service() runs in the daemon mode it will return the stack
+ * pointer for the sigreturn frame in %rax and we call sigreturn directly
+ * from here.
+ * Since a valid stack pointer is positive, it is safe to presume that
+ * return value <= 0 means that parasite_service() called parasite_trap_cmd()
+ * in non-daemon mode, and the parasite should stop at int3.
+ */
ENTRY(__export_parasite_head_start)
- PARASITE_ENTRY 0
+ call parasite_service
+ cmp $0, %rax
+ jle 1f
+ movq %rax, %rsp
+ movq $15, %rax
+ syscall
+1:
int $0x03
END(__export_parasite_head_start)
-
-.align 8
-GLOBAL(__export_parasite_cmd)
- .long 0
diff --git a/compel/arch/x86/plugins/std/prologue.S b/compel/arch/x86/plugins/std/prologue.S
deleted file mode 100644
index 79ad1f6f2..000000000
--- a/compel/arch/x86/plugins/std/prologue.S
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "common/asm/linkage.h"
-#include "asm/prologue.h"
-
-#include "uapi/std/syscall-codes.h"
-
- .section .compel.prologue.text, "ax"
-ENTRY(__export_std_prologue_start)
- push %rsp
-
- leaq __export_std_prologue_init_args(%rip), %rdi
- movq __export_std_plugin_begin(%rip), %rsi
- movq __export_std_plugin_size(%rip), %rdx
- call __export_std_compel_start
-
-do_rt_sigreturn:
- leaq __export_std_prologue_sigframe(%rip), %rax
- addq $8, %rax
- movq %rax, %rsp # we can't use sys_rt_sigreturn here
- mov $__NR_rt_sigreturn, %eax # because we're adjusting stack
- syscall
-
-GLOBAL(__export_std_prologue_init_args)
- .space PROLOGUE_INIT_ARGS_SIZE, 0
-
-GLOBAL(__export_std_plugin_begin)
- .space 8, 0
-GLOBAL(__export_std_plugin_size)
- .space 8, 0
-
- .align 64
-GLOBAL(__export_std_prologue_sigframe)
- .space PROLOGUE_SGFRAME_SIZE, 0
-END(__export_std_prologue_start)
diff --git a/compel/arch/x86/plugins/std/syscalls/Makefile.syscalls b/compel/arch/x86/plugins/std/syscalls/Makefile.syscalls
index 4ba4b56c8..62c25f3e0 100644
--- a/compel/arch/x86/plugins/std/syscalls/Makefile.syscalls
+++ b/compel/arch/x86/plugins/std/syscalls/Makefile.syscalls
@@ -39,6 +39,10 @@ $(sys-proto): $(sys-def) $(sys-proto-types)
$(Q) echo "/* Autogenerated, don't edit */" > $$@
$(Q) echo "#ifndef ASM_SYSCALL_PROTO_H_$(1)__" >> $$@
$(Q) echo "#define ASM_SYSCALL_PROTO_H_$(1)__" >> $$@
+ $(Q) echo "/* musl defines loff_t as off_t */" >> $$@
+ $(Q) echo '#ifndef loff_t' >> $$@
+ $(Q) echo '#define loff_t off_t' >> $$@
+ $(Q) echo '#endif' >> $$@
$(Q) echo '#include ' >> $$@
$(Q) echo '#include ' >> $$@
ifeq ($(1),32)
@@ -71,6 +75,10 @@ $(sys-codes-generic): $(PLUGIN_ARCH_DIR)/std/syscalls/syscall_32.tbl $(sys-proto
$(Q) echo "/* Autogenerated, don't edit */" > $@
$(Q) echo "#ifndef __ASM_CR_SYSCALL_CODES_H__" >> $@
$(Q) echo "#define __ASM_CR_SYSCALL_CODES_H__" >> $@
+ $(Q) echo "/* musl defines loff_t as off_t */" >> $@
+ $(Q) echo '#ifndef loff_t' >> $@
+ $(Q) echo '#define loff_t off_t' >> $@
+ $(Q) echo '#endif' >> $@
$(Q) echo '#include ' >> $@
$(Q) cat $< | awk '/^__NR/{NR32=$$1; \
sub("^__NR", "__NR32", NR32); \
diff --git a/compel/arch/x86/plugins/std/syscalls/syscall32.c b/compel/arch/x86/plugins/std/syscalls/syscall32.c
index e172cacff..d09fd38c7 100644
--- a/compel/arch/x86/plugins/std/syscalls/syscall32.c
+++ b/compel/arch/x86/plugins/std/syscalls/syscall32.c
@@ -1,16 +1,16 @@
#include "asm/types.h"
#include "syscall-32.h"
-#define SYS_SOCKET 1 /* sys_socket(2) */
-#define SYS_BIND 2 /* sys_bind(2) */
-#define SYS_CONNECT 3 /* sys_connect(2) */
-#define SYS_SENDTO 11 /* sys_sendto(2) */
-#define SYS_RECVFROM 12 /* sys_recvfrom(2) */
-#define SYS_SHUTDOWN 13 /* sys_shutdown(2) */
-#define SYS_SETSOCKOPT 14 /* sys_setsockopt(2) */
-#define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */
-#define SYS_SENDMSG 16 /* sys_sendmsg(2) */
-#define SYS_RECVMSG 17 /* sys_recvmsg(2) */
+#define SYS_SOCKET 1 /* sys_socket(2) */
+#define SYS_BIND 2 /* sys_bind(2) */
+#define SYS_CONNECT 3 /* sys_connect(2) */
+#define SYS_SENDTO 11 /* sys_sendto(2) */
+#define SYS_RECVFROM 12 /* sys_recvfrom(2) */
+#define SYS_SHUTDOWN 13 /* sys_shutdown(2) */
+#define SYS_SETSOCKOPT 14 /* sys_setsockopt(2) */
+#define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */
+#define SYS_SENDMSG 16 /* sys_sendmsg(2) */
+#define SYS_RECVMSG 17 /* sys_recvmsg(2) */
long sys_socket(int domain, int type, int protocol)
{
@@ -20,59 +20,61 @@ long sys_socket(int domain, int type, int protocol)
long sys_connect(int sockfd, struct sockaddr *addr, int addrlen)
{
- uint32_t a[] = {(uint32_t)sockfd, (uint32_t)addr, (uint32_t)addrlen};
+ uint32_t a[] = { (uint32_t)sockfd, (uint32_t)addr, (uint32_t)addrlen };
return sys_socketcall(SYS_CONNECT, (unsigned long *)a);
}
long sys_sendto(int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len)
{
- uint32_t a[] = {(uint32_t)sockfd, (uint32_t)buff, (uint32_t)len, (uint32_t)flags, (uint32_t)addr, (uint32_t)addr_len};
+ uint32_t a[] = { (uint32_t)sockfd, (uint32_t)buff, (uint32_t)len,
+ (uint32_t)flags, (uint32_t)addr, (uint32_t)addr_len };
return sys_socketcall(SYS_SENDTO, (unsigned long *)a);
}
long sys_recvfrom(int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len)
{
- uint32_t a[] = {(uint32_t)sockfd, (uint32_t)ubuf, (uint32_t)size, (uint32_t)flags, (uint32_t)addr, (uint32_t)addr_len};
+ uint32_t a[] = { (uint32_t)sockfd, (uint32_t)ubuf, (uint32_t)size,
+ (uint32_t)flags, (uint32_t)addr, (uint32_t)addr_len };
return sys_socketcall(SYS_RECVFROM, (unsigned long *)a);
}
long sys_sendmsg(int sockfd, const struct msghdr *msg, int flags)
{
- uint32_t a[] = {(uint32_t)sockfd, (uint32_t)msg, (uint32_t)flags};
+ uint32_t a[] = { (uint32_t)sockfd, (uint32_t)msg, (uint32_t)flags };
return sys_socketcall(SYS_SENDMSG, (unsigned long *)a);
}
long sys_recvmsg(int sockfd, struct msghdr *msg, int flags)
{
- uint32_t a[] = {(uint32_t)sockfd, (uint32_t)msg, (uint32_t)flags};
+ uint32_t a[] = { (uint32_t)sockfd, (uint32_t)msg, (uint32_t)flags };
return sys_socketcall(SYS_RECVMSG, (unsigned long *)a);
}
long sys_shutdown(int sockfd, int how)
{
- uint32_t a[] = {(uint32_t)sockfd, (uint32_t)how};
+ uint32_t a[] = { (uint32_t)sockfd, (uint32_t)how };
return sys_socketcall(SYS_SHUTDOWN, (unsigned long *)a);
}
long sys_bind(int sockfd, const struct sockaddr *addr, int addrlen)
{
- uint32_t a[] = {(uint32_t)sockfd, (uint32_t)addr, (uint32_t)addrlen};
+ uint32_t a[] = { (uint32_t)sockfd, (uint32_t)addr, (uint32_t)addrlen };
return sys_socketcall(SYS_BIND, (unsigned long *)a);
}
long sys_setsockopt(int sockfd, int level, int optname, const void *optval, unsigned int optlen)
{
- uint32_t a[] = {(uint32_t)sockfd, (uint32_t)level, (uint32_t)optname, (uint32_t)optval, (uint32_t)optlen};
+ uint32_t a[] = { (uint32_t)sockfd, (uint32_t)level, (uint32_t)optname, (uint32_t)optval, (uint32_t)optlen };
return sys_socketcall(SYS_SETSOCKOPT, (unsigned long *)a);
}
long sys_getsockopt(int sockfd, int level, int optname, const void *optval, unsigned int *optlen)
{
- uint32_t a[] = {(uint32_t)sockfd, (uint32_t)level, (uint32_t)optname, (uint32_t)optval, (uint32_t)optlen};
+ uint32_t a[] = { (uint32_t)sockfd, (uint32_t)level, (uint32_t)optname, (uint32_t)optval, (uint32_t)optlen };
return sys_socketcall(SYS_GETSOCKOPT, (unsigned long *)a);
}
-#define SHMAT 21
+#define SHMAT 21
long sys_shmat(int shmid, void *shmaddr, int shmflag)
{
diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
index 7903ab150..cc23dc3f3 100644
--- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
+++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
@@ -96,3 +96,15 @@ __NR_seccomp 354 sys_seccomp (unsigned int op, unsigned int flags, const char
__NR_memfd_create 356 sys_memfd_create (const char *name, unsigned int flags)
__NR_userfaultfd 374 sys_userfaultfd (int flags)
__NR_ppoll 309 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)
+__NR_open_tree 428 sys_open_tree (int dirfd, const char *pathname, unsigned int flags)
+__NR_move_mount 429 sys_move_mount (int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags)
+__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags)
+__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
+__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
+__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
+__NR_close_range 436 sys_close_range (unsigned int fd, unsigned int max_fd, unsigned int flags)
+__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
+__NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size)
+__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
+__NR_rseq 386 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+__NR_membarrier 375 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
index 4ac9164ea..8c3620c2a 100644
--- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
+++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
@@ -85,7 +85,7 @@ __NR_sys_timer_settime 223 sys_timer_settime (kernel_timer_t timer_id, int fla
__NR_sys_timer_gettime 224 sys_timer_gettime (int timer_id, const struct itimerspec *setting)
__NR_sys_timer_getoverrun 225 sys_timer_getoverrun (int timer_id)
__NR_sys_timer_delete 226 sys_timer_delete (kernel_timer_t timer_id)
-__NR_clock_gettime 228 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp)
+__NR_clock_gettime 228 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
__NR_exit_group 231 sys_exit_group (int error_code)
__NR_openat 257 sys_openat (int dfd, const char *filename, int flags, int mode)
__NR_waitid 247 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
@@ -107,3 +107,16 @@ __NR_kcmp 312 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1
__NR_memfd_create 319 sys_memfd_create (const char *name, unsigned int flags)
__NR_userfaultfd 323 sys_userfaultfd (int flags)
__NR_ppoll 271 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)
+__NR_open_tree 428 sys_open_tree (int dirfd, const char *pathname, unsigned int flags)
+__NR_move_mount 429 sys_move_mount (int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags)
+__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags)
+__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
+__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
+__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
+__NR_close_range 436 sys_close_range (unsigned int fd, unsigned int max_fd, unsigned int flags)
+__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
+__NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size)
+__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
+__NR_rseq 334 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
+__NR_membarrier 324 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
+__NR_map_shadow_stack 453 sys_map_shadow_stack (unsigned long addr, unsigned long size, unsigned int flags)
diff --git a/compel/arch/x86/scripts/compel-pack-compat.lds.S b/compel/arch/x86/scripts/compel-pack-compat.lds.S
index ff9c2c6b2..2d907a4a7 100644
--- a/compel/arch/x86/scripts/compel-pack-compat.lds.S
+++ b/compel/arch/x86/scripts/compel-pack-compat.lds.S
@@ -34,8 +34,4 @@ SECTIONS
*(.group*)
*(.eh_frame*)
}
-
-/* Parasite args should have 4 bytes align, as we have futex inside. */
-. = ALIGN(4);
-__export_parasite_args = .;
}
diff --git a/compel/arch/x86/scripts/compel-pack.lds.S b/compel/arch/x86/scripts/compel-pack.lds.S
index 0c936f84d..44e705e29 100644
--- a/compel/arch/x86/scripts/compel-pack.lds.S
+++ b/compel/arch/x86/scripts/compel-pack.lds.S
@@ -13,7 +13,7 @@ SECTIONS
*(.compel.init)
}
- .data : {
+ .data : ALIGN(0x1000) {
*(.data*)
*(.bss*)
}
@@ -34,8 +34,4 @@ SECTIONS
*(.group*)
*(.eh_frame*)
}
-
-/* Parasite args should have 4 bytes align, as we have futex inside. */
-. = ALIGN(4);
-__export_parasite_args = .;
}
diff --git a/compel/arch/x86/src/lib/cpu.c b/compel/arch/x86/src/lib/cpu.c
index 617512167..f57fb3152 100644
--- a/compel/arch/x86/src/lib/cpu.c
+++ b/compel/arch/x86/src/lib/cpu.c
@@ -8,7 +8,7 @@
#include "log.h"
#include "common/bug.h"
-#undef LOG_PREFIX
+#undef LOG_PREFIX
#define LOG_PREFIX "cpu: "
static compel_cpuinfo_t rt_info;
@@ -29,32 +29,24 @@ static void fetch_rt_cpuinfo(void)
* to save/restore PT state in Linux.
*/
-static const char * const xfeature_names[] = {
- "x87 floating point registers" ,
- "SSE registers" ,
- "AVX registers" ,
- "MPX bounds registers" ,
- "MPX CSR" ,
- "AVX-512 opmask" ,
- "AVX-512 Hi256" ,
- "AVX-512 ZMM_Hi256" ,
- "Processor Trace" ,
+static const char *const xfeature_names[] = {
+ "x87 floating point registers",
+ "SSE registers",
+ "AVX registers",
+ "MPX bounds registers",
+ "MPX CSR",
+ "AVX-512 opmask",
+ "AVX-512 Hi256",
+ "AVX-512 ZMM_Hi256",
+ "Processor Trace",
"Protection Keys User registers",
- "Hardware Duty Cycling" ,
+ "Hardware Duty Cycling",
};
static short xsave_cpuid_features[] = {
- X86_FEATURE_FPU,
- X86_FEATURE_XMM,
- X86_FEATURE_AVX,
- X86_FEATURE_MPX,
- X86_FEATURE_MPX,
- X86_FEATURE_AVX512F,
- X86_FEATURE_AVX512F,
- X86_FEATURE_AVX512F,
- X86_FEATURE_INTEL_PT,
- X86_FEATURE_PKU,
- X86_FEATURE_HDC,
+ X86_FEATURE_FPU, X86_FEATURE_XMM, X86_FEATURE_AVX, X86_FEATURE_MPX,
+ X86_FEATURE_MPX, X86_FEATURE_AVX512F, X86_FEATURE_AVX512F, X86_FEATURE_AVX512F,
+ X86_FEATURE_INTEL_PT, X86_FEATURE_PKU, X86_FEATURE_HDC,
};
void compel_set_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
@@ -89,8 +81,7 @@ static int compel_fpuid(compel_cpuinfo_t *c)
uint32_t eax, ebx, ecx, edx;
size_t i;
- BUILD_BUG_ON(ARRAY_SIZE(xsave_cpuid_features) !=
- ARRAY_SIZE(xfeature_names));
+ BUILD_BUG_ON(ARRAY_SIZE(xsave_cpuid_features) != ARRAY_SIZE(xfeature_names));
if (!compel_test_cpu_cap(c, X86_FEATURE_FPU)) {
pr_err("fpu: No FPU detected\n");
@@ -98,9 +89,7 @@ static int compel_fpuid(compel_cpuinfo_t *c)
}
if (!compel_test_cpu_cap(c, X86_FEATURE_XSAVE)) {
- pr_info("fpu: x87 FPU will use %s\n",
- compel_test_cpu_cap(c, X86_FEATURE_FXSR) ?
- "FXSAVE" : "FSAVE");
+ pr_info("fpu: x87 FPU will use %s\n", compel_test_cpu_cap(c, X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
return 0;
}
@@ -125,7 +114,7 @@ static int compel_fpuid(compel_cpuinfo_t *c)
c->xfeatures_mask &= ~(1 << i);
}
- c->xfeatures_mask &= XCNTXT_MASK;
+ c->xfeatures_mask &= XFEATURE_MASK_USER;
c->xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
/*
@@ -140,19 +129,18 @@ static int compel_fpuid(compel_cpuinfo_t *c)
c->xsaves_size = ebx;
pr_debug("fpu: xfeatures_mask 0x%llx xsave_size %u xsave_size_max %u xsaves_size %u\n",
- (unsigned long long)c->xfeatures_mask,
- c->xsave_size, c->xsave_size_max, c->xsaves_size);
+ (unsigned long long)c->xfeatures_mask, c->xsave_size, c->xsave_size_max, c->xsaves_size);
if (c->xsave_size_max > sizeof(struct xsave_struct))
- pr_warn_once("fpu: max xsave frame exceed xsave_struct (%u %u)\n",
- c->xsave_size_max, (unsigned)sizeof(struct xsave_struct));
+ pr_warn_once("fpu: max xsave frame exceed xsave_struct (%u %u)\n", c->xsave_size_max,
+ (unsigned)sizeof(struct xsave_struct));
memset(c->xstate_offsets, 0xff, sizeof(c->xstate_offsets));
memset(c->xstate_sizes, 0xff, sizeof(c->xstate_sizes));
memset(c->xstate_comp_offsets, 0xff, sizeof(c->xstate_comp_offsets));
memset(c->xstate_comp_sizes, 0xff, sizeof(c->xstate_comp_sizes));
- /* start at the beginnning of the "extended state" */
+ /* start at the beginning of the "extended state" */
last_good_offset = offsetof(struct xsave_struct, extended_state_area);
/*
@@ -160,10 +148,10 @@ static int compel_fpuid(compel_cpuinfo_t *c)
* in the fixed offsets in the xsave area in either compacted form
* or standard form.
*/
- c->xstate_offsets[0] = 0;
- c->xstate_sizes[0] = offsetof(struct i387_fxsave_struct, xmm_space);
- c->xstate_offsets[1] = c->xstate_sizes[0];
- c->xstate_sizes[1] = FIELD_SIZEOF(struct i387_fxsave_struct, xmm_space);
+ c->xstate_offsets[0] = 0;
+ c->xstate_sizes[0] = offsetof(struct i387_fxsave_struct, xmm_space);
+ c->xstate_offsets[1] = c->xstate_sizes[0];
+ c->xstate_sizes[1] = FIELD_SIZEOF(struct i387_fxsave_struct, xmm_space);
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if (!(c->xfeatures_mask & (1UL << i)))
@@ -189,8 +177,7 @@ static int compel_fpuid(compel_cpuinfo_t *c)
* highest offset in the buffer. Ensure it does.
*/
if (last_good_offset > c->xstate_offsets[i])
- pr_warn_once("fpu: misordered xstate %d %d\n",
- last_good_offset, c->xstate_offsets[i]);
+ pr_warn_once("fpu: misordered xstate %d %d\n", last_good_offset, c->xstate_offsets[i]);
last_good_offset = c->xstate_offsets[i];
}
@@ -198,10 +185,10 @@ static int compel_fpuid(compel_cpuinfo_t *c)
BUILD_BUG_ON(sizeof(c->xstate_offsets) != sizeof(c->xstate_sizes));
BUILD_BUG_ON(sizeof(c->xstate_comp_offsets) != sizeof(c->xstate_comp_sizes));
- c->xstate_comp_offsets[0] = 0;
- c->xstate_comp_sizes[0] = offsetof(struct i387_fxsave_struct, xmm_space);
- c->xstate_comp_offsets[1] = c->xstate_comp_sizes[0];
- c->xstate_comp_sizes[1] = FIELD_SIZEOF(struct i387_fxsave_struct, xmm_space);
+ c->xstate_comp_offsets[0] = 0;
+ c->xstate_comp_sizes[0] = offsetof(struct i387_fxsave_struct, xmm_space);
+ c->xstate_comp_offsets[1] = c->xstate_comp_sizes[0];
+ c->xstate_comp_sizes[1] = FIELD_SIZEOF(struct i387_fxsave_struct, xmm_space);
if (!compel_test_cpu_cap(c, X86_FEATURE_XSAVES)) {
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
@@ -211,8 +198,7 @@ static int compel_fpuid(compel_cpuinfo_t *c)
}
}
} else {
- c->xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] =
- FXSAVE_SIZE + XSAVE_HDR_SIZE;
+ c->xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if ((c->xfeatures_mask & (1UL << i)))
@@ -221,8 +207,7 @@ static int compel_fpuid(compel_cpuinfo_t *c)
c->xstate_comp_sizes[i] = 0;
if (i > FIRST_EXTENDED_XFEATURE) {
- c->xstate_comp_offsets[i] = c->xstate_comp_offsets[i-1]
- + c->xstate_comp_sizes[i-1];
+ c->xstate_comp_offsets[i] = c->xstate_comp_offsets[i - 1] + c->xstate_comp_sizes[i - 1];
/*
* The value returned by ECX[1] indicates the alignment
@@ -240,9 +225,9 @@ static int compel_fpuid(compel_cpuinfo_t *c)
for (i = 0; i < ARRAY_SIZE(c->xstate_offsets); i++) {
if (!(c->xfeatures_mask & (1UL << i)))
continue;
- pr_debug("fpu: %-32s xstate_offsets %6d / %-6d xstate_sizes %6d / %-6d\n",
- xfeature_names[i], c->xstate_offsets[i], c->xstate_comp_offsets[i],
- c->xstate_sizes[i], c->xstate_comp_sizes[i]);
+ pr_debug("fpu: %-32s xstate_offsets %6d / %-6d xstate_sizes %6d / %-6d\n", xfeature_names[i],
+ c->xstate_offsets[i], c->xstate_comp_offsets[i], c->xstate_sizes[i],
+ c->xstate_comp_sizes[i]);
}
}
@@ -261,20 +246,15 @@ int compel_cpuid(compel_cpuinfo_t *c)
*/
/* Get vendor name */
- cpuid(0x00000000,
- (unsigned int *)&c->cpuid_level,
- (unsigned int *)&c->x86_vendor_id[0],
- (unsigned int *)&c->x86_vendor_id[8],
- (unsigned int *)&c->x86_vendor_id[4]);
+ cpuid(0x00000000, (unsigned int *)&c->cpuid_level, (unsigned int *)&c->x86_vendor_id[0],
+ (unsigned int *)&c->x86_vendor_id[8], (unsigned int *)&c->x86_vendor_id[4]);
if (!strcmp(c->x86_vendor_id, "GenuineIntel")) {
c->x86_vendor = X86_VENDOR_INTEL;
- } else if (!strcmp(c->x86_vendor_id, "AuthenticAMD") ||
- !strcmp(c->x86_vendor_id, "HygonGenuine")) {
+ } else if (!strcmp(c->x86_vendor_id, "AuthenticAMD") || !strcmp(c->x86_vendor_id, "HygonGenuine")) {
c->x86_vendor = X86_VENDOR_AMD;
} else {
- pr_err("Unsupported CPU vendor %s\n",
- c->x86_vendor_id);
+ pr_err("Unsupported CPU vendor %s\n", c->x86_vendor_id);
return -1;
}
@@ -369,7 +349,7 @@ int compel_cpuid(compel_cpuinfo_t *c)
while (*p)
*q++ = *p++;
while (q <= &c->x86_model_id[48])
- *q++ = '\0'; /* Zero-pad the rest */
+ *q++ = '\0'; /* Zero-pad the rest */
}
}
@@ -440,8 +420,7 @@ int compel_cpuid(compel_cpuinfo_t *c)
break;
}
- pr_debug("x86_family %u x86_vendor_id %s x86_model_id %s\n",
- c->x86_family, c->x86_vendor_id, c->x86_model_id);
+ pr_debug("x86_family %u x86_vendor_id %s x86_model_id %s\n", c->x86_family, c->x86_vendor_id, c->x86_model_id);
return compel_fpuid(c);
}
@@ -461,8 +440,7 @@ bool compel_fpu_has_feature(unsigned int feature)
uint32_t compel_fpu_feature_size(unsigned int feature)
{
fetch_rt_cpuinfo();
- if (feature >= FIRST_EXTENDED_XFEATURE &&
- feature < XFEATURE_MAX)
+ if (feature >= FIRST_EXTENDED_XFEATURE && feature < XFEATURE_MAX)
return rt_info.xstate_sizes[feature];
return 0;
}
@@ -470,8 +448,7 @@ uint32_t compel_fpu_feature_size(unsigned int feature)
uint32_t compel_fpu_feature_offset(unsigned int feature)
{
fetch_rt_cpuinfo();
- if (feature >= FIRST_EXTENDED_XFEATURE &&
- feature < XFEATURE_MAX)
+ if (feature >= FIRST_EXTENDED_XFEATURE && feature < XFEATURE_MAX)
return rt_info.xstate_offsets[feature];
return 0;
}
diff --git a/compel/arch/x86/src/lib/handle-elf.c b/compel/arch/x86/src/lib/handle-elf.c
index 62fb28f49..78b23f28a 100644
--- a/compel/arch/x86/src/lib/handle-elf.c
+++ b/compel/arch/x86/src/lib/handle-elf.c
@@ -1,14 +1,12 @@
#include
-
-#include "uapi/compel.h"
+#include
#include "handle-elf.h"
#include "piegen.h"
#include "log.h"
-static const unsigned char __maybe_unused
-elf_ident_64_le[EI_NIDENT] = {
- 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,
+static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = {
+ 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
diff --git a/compel/arch/x86/src/lib/include/cpu.h b/compel/arch/x86/src/lib/include/cpu.h
index 60b7d24d4..de3b0a0ff 100644
--- a/compel/arch/x86/src/lib/include/cpu.h
+++ b/compel/arch/x86/src/lib/include/cpu.h
@@ -1,31 +1,21 @@
#ifndef __COMPEL_ASM_CPU_H__
#define __COMPEL_ASM_CPU_H__
-static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
+static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
{
/* ecx is often an input as well as an output. */
- asm volatile("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx)
- : "memory");
+ asm volatile("cpuid" : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) : "0"(*eax), "2"(*ecx) : "memory");
}
-static inline void cpuid(unsigned int op,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
+static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
{
*eax = op;
*ecx = 0;
native_cpuid(eax, ebx, ecx, edx);
}
-static inline void cpuid_count(unsigned int op, int count,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
+static inline void cpuid_count(unsigned int op, int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
+ unsigned int *edx)
{
*eax = op;
*ecx = count;
diff --git a/compel/arch/x86/src/lib/include/handle-elf.h b/compel/arch/x86/src/lib/include/handle-elf.h
index e68fe3bcf..f3150f3f1 100644
--- a/compel/arch/x86/src/lib/include/handle-elf.h
+++ b/compel/arch/x86/src/lib/include/handle-elf.h
@@ -6,15 +6,15 @@
#define ELF_X86_64
#ifndef R_X86_64_GOTPCRELX
-# define R_X86_64_GOTPCRELX 41
+#define R_X86_64_GOTPCRELX 41
#endif
#ifndef R_X86_64_REX_GOTPCRELX
-# define R_X86_64_REX_GOTPCRELX 42
+#define R_X86_64_REX_GOTPCRELX 42
#endif
-#define __handle_elf handle_elf_x86_64
-#define arch_is_machine_supported(e_machine) (e_machine == EM_X86_64)
+#define __handle_elf handle_elf_x86_64
+#define arch_is_machine_supported(e_machine) (e_machine == EM_X86_64)
extern int handle_elf_x86_32(void *mem, size_t size);
extern int handle_elf_x86_64(void *mem, size_t size);
diff --git a/compel/arch/x86/src/lib/include/syscall.h b/compel/arch/x86/src/lib/include/syscall.h
index 9af1b1f99..a1b742b11 100644
--- a/compel/arch/x86/src/lib/include/syscall.h
+++ b/compel/arch/x86/src/lib/include/syscall.h
@@ -1,6 +1,6 @@
#ifndef __COMPEL_SYSCALL_H__
#define __COMPEL_SYSCALL_H__
-#define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall)
+#define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall)
/*
* For x86_32 __NR_mmap inside the kernel represents old_mmap system
diff --git a/compel/arch/x86/src/lib/include/uapi/asm/cpu.h b/compel/arch/x86/src/lib/include/uapi/asm/cpu.h
index bb1914da4..11c50e0e5 100644
--- a/compel/arch/x86/src/lib/include/uapi/asm/cpu.h
+++ b/compel/arch/x86/src/lib/include/uapi/asm/cpu.h
@@ -15,334 +15,335 @@
* to keep it here, since it's an ABI now.
*/
enum cpuid_leafs {
- CPUID_1_EDX = 0,
- CPUID_8000_0001_EDX = 1,
- CPUID_8086_0001_EDX = 2,
- CPUID_LNX_1 = 3,
- CPUID_1_ECX = 4,
- CPUID_C000_0001_EDX = 5,
- CPUID_8000_0001_ECX = 6,
- CPUID_LNX_2 = 7,
- CPUID_LNX_3 = 8,
- CPUID_7_0_EBX = 9,
- CPUID_D_1_EAX = 10,
- CPUID_7_0_ECX = 11,
- CPUID_F_1_EDX = 12,
- CPUID_8000_0008_EBX = 13,
- CPUID_6_EAX = 14,
- CPUID_8000_000A_EDX = 15,
- CPUID_F_0_EDX = 16,
- CPUID_8000_0007_EBX = 17,
- CPUID_7_0_EDX = 18,
+ CPUID_1_EDX = 0,
+ CPUID_8000_0001_EDX = 1,
+ CPUID_8086_0001_EDX = 2,
+ CPUID_LNX_1 = 3,
+ CPUID_1_ECX = 4,
+ CPUID_C000_0001_EDX = 5,
+ CPUID_8000_0001_ECX = 6,
+ CPUID_LNX_2 = 7,
+ CPUID_LNX_3 = 8,
+ CPUID_7_0_EBX = 9,
+ CPUID_D_1_EAX = 10,
+ CPUID_7_0_ECX = 11,
+ CPUID_F_1_EDX = 12,
+ CPUID_8000_0008_EBX = 13,
+ CPUID_6_EAX = 14,
+ CPUID_8000_000A_EDX = 15,
+ CPUID_F_0_EDX = 16,
+ CPUID_8000_0007_EBX = 17,
+ CPUID_7_0_EDX = 18,
};
-#define NCAPINTS_V1 12
-#define NCAPINTS_V2 19
+#define NCAPINTS_V1 12
+#define NCAPINTS_V2 19
-#define NCAPINTS (NCAPINTS_V2) /* N 32-bit words worth of info */
-#define NCAPINTS_BITS (NCAPINTS * 32)
+#define NCAPINTS (NCAPINTS_V2) /* N 32-bit words worth of info */
+#define NCAPINTS_BITS (NCAPINTS * 32)
/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
-#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV (0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN (0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH (0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-#define X86_FEATURE_XMM (0*32+25) /* "sse" */
-#define X86_FEATURE_XMM2 (0*32+26) /* "sse2" */
-#define X86_FEATURE_SELFSNOOP (0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC (0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE (0*32+31) /* Pending Break Enable */
+#define X86_FEATURE_FPU (0 * 32 + 0) /* Onboard FPU */
+#define X86_FEATURE_VME (0 * 32 + 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE (0 * 32 + 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE (0 * 32 + 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC (0 * 32 + 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR (0 * 32 + 5) /* Model-Specific Registers */
+#define X86_FEATURE_PAE (0 * 32 + 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE (0 * 32 + 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8 (0 * 32 + 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC (0 * 32 + 9) /* Onboard APIC */
+#define X86_FEATURE_SEP (0 * 32 + 11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR (0 * 32 + 12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE (0 * 32 + 13) /* Page Global Enable */
+#define X86_FEATURE_MCA (0 * 32 + 14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV (0 * 32 + 15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT (0 * 32 + 16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36 (0 * 32 + 17) /* 36-bit PSEs */
+#define X86_FEATURE_PN (0 * 32 + 18) /* Processor serial number */
+#define X86_FEATURE_CLFLUSH (0 * 32 + 19) /* CLFLUSH instruction */
+#define X86_FEATURE_DS (0 * 32 + 21) /* "dts" Debug Store */
+#define X86_FEATURE_ACPI (0 * 32 + 22) /* ACPI via MSR */
+#define X86_FEATURE_MMX (0 * 32 + 23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR (0 * 32 + 24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM (0 * 32 + 25) /* "sse" */
+#define X86_FEATURE_XMM2 (0 * 32 + 26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP (0 * 32 + 27) /* "ss" CPU self snoop */
+#define X86_FEATURE_HT (0 * 32 + 28) /* Hyper-Threading */
+#define X86_FEATURE_ACC (0 * 32 + 29) /* "tm" Automatic clock control */
+#define X86_FEATURE_IA64 (0 * 32 + 30) /* IA-64 processor */
+#define X86_FEATURE_PBE (0 * 32 + 31) /* Pending Break Enable */
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP (1*32+19) /* MP Capable */
-#define X86_FEATURE_NX (1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT (1*32+25) /* FXSAVE/FXRSTOR optimizations */
-#define X86_FEATURE_GBPAGES (1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64, 64-bit support) */
-#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow extensions */
-#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow */
+#define X86_FEATURE_SYSCALL (1 * 32 + 11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP (1 * 32 + 19) /* MP Capable */
+#define X86_FEATURE_NX (1 * 32 + 20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT (1 * 32 + 22) /* AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT (1 * 32 + 25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES (1 * 32 + 26) /* "pdpe1gb" GB pages */
+#define X86_FEATURE_RDTSCP (1 * 32 + 27) /* RDTSCP */
+#define X86_FEATURE_LM (1 * 32 + 29) /* Long Mode (x86-64, 64-bit support) */
+#define X86_FEATURE_3DNOWEXT (1 * 32 + 30) /* AMD 3DNow extensions */
+#define X86_FEATURE_3DNOW (1 * 32 + 31) /* 3DNow */
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */
+#define X86_FEATURE_RECOVERY (2 * 32 + 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN (2 * 32 + 1) /* Longrun power control */
+#define X86_FEATURE_LRTI (2 * 32 + 3) /* LongRun table interface */
/* Other features, Linux-defined mapping, word 3 */
/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
+#define X86_FEATURE_CXMMX (3 * 32 + 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR (3 * 32 + 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR (3 * 32 + 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR (3 * 32 + 3) /* Centaur MCRs (= MTRRs) */
/* CPU types for specific tunings: */
-#define X86_FEATURE_K8 (3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7 (3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3 (3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4 (3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP (3*32+ 9) /* SMP kernel running on UP */
-#define X86_FEATURE_ART (3*32+10) /* Always running timer (ART) */
-#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in IA32 userspace */
-#define X86_FEATURE_SYSENTER32 (3*32+15) /* "" sysenter in IA32 userspace */
-#define X86_FEATURE_REP_GOOD (3*32+16) /* REP microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" MFENCE synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" LFENCE synchronizes RDTSC */
-#define X86_FEATURE_ACC_POWER (3*32+19) /* AMD Accumulated Power Mechanism */
-#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS (3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY (3*32+22) /* CPU topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */
-#define X86_FEATURE_CPUID (3*32+25) /* CPU has CPUID instruction itself */
-#define X86_FEATURE_EXTD_APICID (3*32+26) /* Extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM (3*32+27) /* AMD multi-node processor */
-#define X86_FEATURE_APERFMPERF (3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
-#define X86_FEATURE_NONSTOP_TSC_S3 (3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_TSC_KNOWN_FREQ (3*32+31) /* TSC has known frequency */
+#define X86_FEATURE_K8 (3 * 32 + 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7 (3 * 32 + 5) /* "" Athlon */
+#define X86_FEATURE_P3 (3 * 32 + 6) /* "" P3 */
+#define X86_FEATURE_P4 (3 * 32 + 7) /* "" P4 */
+#define X86_FEATURE_CONSTANT_TSC (3 * 32 + 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP (3 * 32 + 9) /* SMP kernel running on UP */
+#define X86_FEATURE_ART (3 * 32 + 10) /* Always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON (3 * 32 + 11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS (3 * 32 + 12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS (3 * 32 + 13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32 (3 * 32 + 14) /* "" syscall in IA32 userspace */
+#define X86_FEATURE_SYSENTER32 (3 * 32 + 15) /* "" sysenter in IA32 userspace */
+#define X86_FEATURE_REP_GOOD (3 * 32 + 16) /* REP microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC (3 * 32 + 17) /* "" MFENCE synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC (3 * 32 + 18) /* "" LFENCE synchronizes RDTSC */
+#define X86_FEATURE_ACC_POWER (3 * 32 + 19) /* AMD Accumulated Power Mechanism */
+#define X86_FEATURE_NOPL (3 * 32 + 20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS (3 * 32 + 21) /* "" Always-present feature */
+#define X86_FEATURE_XTOPOLOGY (3 * 32 + 22) /* CPU topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE (3 * 32 + 23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC (3 * 32 + 24) /* TSC does not stop in C states */
+#define X86_FEATURE_CPUID (3 * 32 + 25) /* CPU has CPUID instruction itself */
+#define X86_FEATURE_EXTD_APICID (3 * 32 + 26) /* Extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM (3 * 32 + 27) /* AMD multi-node processor */
+#define X86_FEATURE_APERFMPERF (3 * 32 + 28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+#define X86_FEATURE_NONSTOP_TSC_S3 (3 * 32 + 30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ (3 * 32 + 31) /* TSC has known frequency */
/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
-#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ (4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64 (4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT (4*32+ 3) /* "monitor" MONITOR/MWAIT support */
-#define X86_FEATURE_DSCPL (4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
-#define X86_FEATURE_VMX (4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX (4*32+ 6) /* Safer Mode eXtensions */
-#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID (4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG (4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA (4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B instruction */
-#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM (4*32+15) /* Perf/Debug Capabilities MSR */
-#define X86_FEATURE_PCID (4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */
-#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC (4*32+21) /* X2APIC */
-#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER (4*32+24) /* TSC deadline timer */
-#define X86_FEATURE_AES (4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
-#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE instruction enabled in the OS */
-#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C (4*32+29) /* 16-bit FP conversions */
-#define X86_FEATURE_RDRAND (4*32+30) /* RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */
+#define X86_FEATURE_XMM3 (4 * 32 + 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ (4 * 32 + 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64 (4 * 32 + 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT (4 * 32 + 3) /* "monitor" MONITOR/MWAIT support */
+#define X86_FEATURE_DSCPL (4 * 32 + 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
+#define X86_FEATURE_VMX (4 * 32 + 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX (4 * 32 + 6) /* Safer Mode eXtensions */
+#define X86_FEATURE_EST (4 * 32 + 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2 (4 * 32 + 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 (4 * 32 + 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CID (4 * 32 + 10) /* Context ID */
+#define X86_FEATURE_SDBG (4 * 32 + 11) /* Silicon Debug */
+#define X86_FEATURE_FMA (4 * 32 + 12) /* Fused multiply-add */
+#define X86_FEATURE_CX16 (4 * 32 + 13) /* CMPXCHG16B instruction */
+#define X86_FEATURE_XTPR (4 * 32 + 14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM (4 * 32 + 15) /* Perf/Debug Capabilities MSR */
+#define X86_FEATURE_PCID (4 * 32 + 17) /* Process Context Identifiers */
+#define X86_FEATURE_DCA (4 * 32 + 18) /* Direct Cache Access */
+#define X86_FEATURE_XMM4_1 (4 * 32 + 19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2 (4 * 32 + 20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC (4 * 32 + 21) /* X2APIC */
+#define X86_FEATURE_MOVBE (4 * 32 + 22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT (4 * 32 + 23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER (4 * 32 + 24) /* TSC deadline timer */
+#define X86_FEATURE_AES (4 * 32 + 25) /* AES instructions */
+#define X86_FEATURE_XSAVE (4 * 32 + 26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
+#define X86_FEATURE_OSXSAVE (4 * 32 + 27) /* "" XSAVE instruction enabled in the OS */
+#define X86_FEATURE_AVX (4 * 32 + 28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C (4 * 32 + 29) /* 16-bit FP conversions */
+#define X86_FEATURE_RDRAND (4 * 32 + 30) /* RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR (4 * 32 + 31) /* Running on a hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */
-#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* "rng_en" RNG enabled */
-#define X86_FEATURE_XCRYPT (5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
-#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE (5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN (5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM (5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN (5*32+13) /* PMM enabled */
+#define X86_FEATURE_XSTORE (5 * 32 + 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN (5 * 32 + 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT (5 * 32 + 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN (5 * 32 + 7) /* "ace_en" on-CPU crypto enabled */
+#define X86_FEATURE_ACE2 (5 * 32 + 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN (5 * 32 + 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE (5 * 32 + 10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN (5 * 32 + 11) /* PHE enabled */
+#define X86_FEATURE_PMM (5 * 32 + 12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN (5 * 32 + 13) /* PMM enabled */
/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
-#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM (6*32+ 2) /* Secure Virtual Machine */
-#define X86_FEATURE_EXTAPIC (6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE (6*32+17) /* Translation Cache Extension */
-#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM (6*32+21) /* Trailing Bit Manipulations */
-#define X86_FEATURE_TOPOEXT (6*32+22) /* Topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* Core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT (6*32+26) /* Data breakpoint extension */
-#define X86_FEATURE_PTSC (6*32+27) /* Performance time-stamp counter */
-#define X86_FEATURE_PERFCTR_LLC (6*32+28) /* Last Level Cache performance counter extensions */
-#define X86_FEATURE_MWAITX (6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
+#define X86_FEATURE_LAHF_LM (6 * 32 + 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY (6 * 32 + 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM (6 * 32 + 2) /* Secure Virtual Machine */
+#define X86_FEATURE_EXTAPIC (6 * 32 + 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY (6 * 32 + 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM (6 * 32 + 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A (6 * 32 + 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE (6 * 32 + 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH (6 * 32 + 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW (6 * 32 + 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS (6 * 32 + 10) /* Instruction Based Sampling */
+#define X86_FEATURE_XOP (6 * 32 + 11) /* extended AVX instructions */
+#define X86_FEATURE_SKINIT (6 * 32 + 12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT (6 * 32 + 13) /* Watchdog timer */
+#define X86_FEATURE_LWP (6 * 32 + 15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4 (6 * 32 + 16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE (6 * 32 + 17) /* Translation Cache Extension */
+#define X86_FEATURE_NODEID_MSR (6 * 32 + 19) /* NodeId MSR */
+#define X86_FEATURE_TBM (6 * 32 + 21) /* Trailing Bit Manipulations */
+#define X86_FEATURE_TOPOEXT (6 * 32 + 22) /* Topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE (6 * 32 + 23) /* Core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB (6 * 32 + 24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT (6 * 32 + 26) /* Data breakpoint extension */
+#define X86_FEATURE_PTSC (6 * 32 + 27) /* Performance time-stamp counter */
+#define X86_FEATURE_PERFCTR_LLC (6 * 32 + 28) /* Last Level Cache performance counter extensions */
+#define X86_FEATURE_MWAITX (6 * 32 + 29) /* MWAIT extension (MONITORX/MWAITX instructions) */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
-#define X86_FEATURE_FSGSBASE (9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
-#define X86_FEATURE_TSC_ADJUST (9*32+ 1) /* TSC adjustment MSR 0x3B */
-#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
-#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM (9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_MPX (9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_RDT_A (9*32+15) /* Resource Director Technology Allocation */
-#define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_AVX512DQ (9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
-#define X86_FEATURE_RDSEED (9*32+18) /* RDSEED instruction */
-#define X86_FEATURE_ADX (9*32+19) /* ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_AVX512IFMA (9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
-#define X86_FEATURE_CLFLUSHOPT (9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB (9*32+24) /* CLWB instruction */
-#define X86_FEATURE_INTEL_PT (9*32+25) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512PF (9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER (9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD (9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI (9*32+29) /* SHA1/SHA256 Instruction Extensions */
-#define X86_FEATURE_AVX512BW (9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
-#define X86_FEATURE_AVX512VL (9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+#define X86_FEATURE_FSGSBASE (9 * 32 + 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
+#define X86_FEATURE_TSC_ADJUST (9 * 32 + 1) /* TSC adjustment MSR 0x3B */
+#define X86_FEATURE_BMI1 (9 * 32 + 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE (9 * 32 + 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2 (9 * 32 + 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP (9 * 32 + 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 (9 * 32 + 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS (9 * 32 + 9) /* Enhanced REP MOVSB/STOSB instructions */
+#define X86_FEATURE_INVPCID (9 * 32 + 10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM (9 * 32 + 11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM (9 * 32 + 12) /* Cache QoS Monitoring */
+#define X86_FEATURE_MPX (9 * 32 + 14) /* Memory Protection Extension */
+#define X86_FEATURE_RDT_A (9 * 32 + 15) /* Resource Director Technology Allocation */
+#define X86_FEATURE_AVX512F (9 * 32 + 16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ (9 * 32 + 17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED (9 * 32 + 18) /* RDSEED instruction */
+#define X86_FEATURE_ADX (9 * 32 + 19) /* ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP (9 * 32 + 20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA (9 * 32 + 21) /* AVX-512 Integer Fused Multiply-Add instructions */
+#define X86_FEATURE_CLFLUSHOPT (9 * 32 + 23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB (9 * 32 + 24) /* CLWB instruction */
+#define X86_FEATURE_INTEL_PT (9 * 32 + 25) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512PF (9 * 32 + 26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER (9 * 32 + 27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD (9 * 32 + 28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI (9 * 32 + 29) /* SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW (9 * 32 + 30) /* AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL (9 * 32 + 31) /* AVX-512 VL (128/256 Vector Length) Extensions */
/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
-#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
-#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
-#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
-#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
+#define X86_FEATURE_XSAVEOPT (10 * 32 + 0) /* XSAVEOPT instruction */
+#define X86_FEATURE_XSAVEC (10 * 32 + 1) /* XSAVEC instruction */
+#define X86_FEATURE_XGETBV1 (10 * 32 + 2) /* XGETBV with ECX = 1 instruction */
+#define X86_FEATURE_XSAVES (10 * 32 + 3) /* XSAVES/XRSTORS instructions */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 11 */
-#define X86_FEATURE_PREFETCHWT1 (11*32+ 0) /* PREFETCHWT1 Intel® Xeon PhiTM only */
-#define X86_FEATURE_AVX512VBMI (11*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
-#define X86_FEATURE_UMIP (11*32+ 2) /* User Mode Instruction Protection */
-#define X86_FEATURE_PKU (11*32+ 3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (11*32+ 4) /* OS Protection Keys Enable */
-#define X86_FEATURE_AVX512_VBMI2 (11*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
-#define X86_FEATURE_GFNI (11*32+ 8) /* Galois Field New Instructions */
-#define X86_FEATURE_VAES (11*32+ 9) /* Vector AES */
-#define X86_FEATURE_VPCLMULQDQ (11*32+10) /* Carry-Less Multiplication Double Quadword */
-#define X86_FEATURE_AVX512_VNNI (11*32+11) /* Vector Neural Network Instructions */
-#define X86_FEATURE_AVX512_BITALG (11*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
-#define X86_FEATURE_TME (11*32+13) /* Intel Total Memory Encryption */
-#define X86_FEATURE_AVX512_VPOPCNTDQ (11*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_LA57 (11*32+16) /* 5-level page tables */
-#define X86_FEATURE_RDPID (11*32+22) /* RDPID instruction */
-#define X86_FEATURE_CLDEMOTE (11*32+25) /* CLDEMOTE instruction */
+#define X86_FEATURE_PREFETCHWT1 (11 * 32 + 0) /* PREFETCHWT1 Intel® Xeon PhiTM only */
+#define X86_FEATURE_AVX512VBMI (11 * 32 + 1) /* AVX512 Vector Bit Manipulation instructions*/
+#define X86_FEATURE_UMIP (11 * 32 + 2) /* User Mode Instruction Protection */
+#define X86_FEATURE_PKU (11 * 32 + 3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (11 * 32 + 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_AVX512_VBMI2 (11 * 32 + 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_SHSTK (11 * 32 + 7) /* Shadow Stack */
+#define X86_FEATURE_GFNI (11 * 32 + 8) /* Galois Field New Instructions */
+#define X86_FEATURE_VAES (11 * 32 + 9) /* Vector AES */
+#define X86_FEATURE_VPCLMULQDQ (11 * 32 + 10) /* Carry-Less Multiplication Double Quadword */
+#define X86_FEATURE_AVX512_VNNI (11 * 32 + 11) /* Vector Neural Network Instructions */
+#define X86_FEATURE_AVX512_BITALG (11 * 32 + 12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME (11 * 32 + 13) /* Intel Total Memory Encryption */
+#define X86_FEATURE_AVX512_VPOPCNTDQ (11 * 32 + 14) /* POPCNT for vectors of DW/QW */
+#define X86_FEATURE_LA57 (11 * 32 + 16) /* 5-level page tables */
+#define X86_FEATURE_RDPID (11 * 32 + 22) /* RDPID instruction */
+#define X86_FEATURE_CLDEMOTE (11 * 32 + 25) /* CLDEMOTE instruction */
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */
-#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
-#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
+#define X86_FEATURE_CQM_OCCUP_LLC (12 * 32 + 0) /* LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL (12 * 32 + 1) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (12 * 32 + 2) /* LLC Local MBM monitoring */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
-#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
-#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
-#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_CLZERO (13 * 32 + 0) /* CLZERO instruction */
+#define X86_FEATURE_IRPERF (13 * 32 + 1) /* Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR (13 * 32 + 2) /* Always save/restore FP error pointers */
+#define X86_FEATURE_IBPB (13 * 32 + 12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_IBRS (13 * 32 + 14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_STIBP (13 * 32 + 15) /* Single Thread Indirect Branch Predictors */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
-#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-#define X86_FEATURE_HDC (14*32+13) /* HDC base registers present */
+#define X86_FEATURE_DTHERM (14 * 32 + 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA (14 * 32 + 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (14 * 32 + 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN (14 * 32 + 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS (14 * 32 + 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP (14 * 32 + 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY (14 * 32 + 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14 * 32 + 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP (14 * 32 + 10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14 * 32 + 11) /* HWP Package Level Request */
+#define X86_FEATURE_HDC (14 * 32 + 13) /* HDC base registers present */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
-#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
-#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
-#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
-#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
-#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
+#define X86_FEATURE_NPT (15 * 32 + 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV (15 * 32 + 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML (15 * 32 + 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS (15 * 32 + 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR (15 * 32 + 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN (15 * 32 + 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID (15 * 32 + 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15 * 32 + 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15 * 32 + 10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15 * 32 + 12) /* pause filter threshold */
+#define X86_FEATURE_AVIC (15 * 32 + 13) /* Virtual Interrupt Controller */
+#define X86_FEATURE_V_VMSAVE_VMLOAD (15 * 32 + 15) /* Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF (15 * 32 + 16) /* Virtual GIF */
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 16 */
-#define X86_FEATURE_CQM_LLC (16*32+ 1) /* LLC QoS if 1 */
+#define X86_FEATURE_CQM_LLC (16 * 32 + 1) /* LLC QoS if 1 */
/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
-#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
-#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
-#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
+#define X86_FEATURE_OVERFLOW_RECOV (17 * 32 + 0) /* MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR (17 * 32 + 1) /* Uncorrectable error containment and recovery */
+#define X86_FEATURE_SMCA (17 * 32 + 3) /* Scalable MCA */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
-#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
-#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
-#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
-#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
-#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
+#define X86_FEATURE_AVX512_4VNNIW (18 * 32 + 2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18 * 32 + 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_PCONFIG (18 * 32 + 18) /* Intel PCONFIG */
+#define X86_FEATURE_SPEC_CTRL (18 * 32 + 26) /* "" Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP (18 * 32 + 27) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ARCH_CAPABILITIES (18 * 32 + 29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_SPEC_CTRL_SSBD (18 * 32 + 31) /* "" Speculative Store Bypass Disable */
enum {
- X86_VENDOR_INTEL = 0,
- X86_VENDOR_AMD = 1,
+ X86_VENDOR_INTEL = 0,
+ X86_VENDOR_AMD = 1,
X86_VENDOR_MAX
};
struct cpuinfo_x86 {
/* cpu context */
- uint8_t x86_family;
- uint8_t x86_vendor;
- uint8_t x86_model;
- uint8_t x86_mask;
- uint32_t x86_capability[NCAPINTS];
- uint32_t x86_power;
- uint32_t extended_cpuid_level;
- int cpuid_level;
- char x86_vendor_id[16];
- char x86_model_id[64];
+ uint8_t x86_family;
+ uint8_t x86_vendor;
+ uint8_t x86_model;
+ uint8_t x86_mask;
+ uint32_t x86_capability[NCAPINTS];
+ uint32_t x86_power;
+ uint32_t extended_cpuid_level;
+ int cpuid_level;
+ char x86_vendor_id[16];
+ char x86_model_id[64];
/* fpu context */
- uint64_t xfeatures_mask;
- uint32_t xsave_size_max;
- uint32_t xsave_size;
- uint32_t xstate_offsets[XFEATURE_MAX];
- uint32_t xstate_sizes[XFEATURE_MAX];
+ uint64_t xfeatures_mask;
+ uint32_t xsave_size_max;
+ uint32_t xsave_size;
+ uint32_t xstate_offsets[XFEATURE_MAX];
+ uint32_t xstate_sizes[XFEATURE_MAX];
- uint32_t xsaves_size;
- uint32_t xstate_comp_offsets[XFEATURE_MAX];
- uint32_t xstate_comp_sizes[XFEATURE_MAX];
+ uint32_t xsaves_size;
+ uint32_t xstate_comp_offsets[XFEATURE_MAX];
+ uint32_t xstate_comp_sizes[XFEATURE_MAX];
};
typedef struct cpuinfo_x86 compel_cpuinfo_t;
diff --git a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h
index 509f4488b..d595a68fc 100644
--- a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h
+++ b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h
@@ -7,27 +7,48 @@
#include
-#define FP_MIN_ALIGN_BYTES 64
-#define FXSAVE_ALIGN_BYTES 16
+#define FP_MIN_ALIGN_BYTES 64
+#define FXSAVE_ALIGN_BYTES 16
-#define FP_XSTATE_MAGIC1 0x46505853U
-#define FP_XSTATE_MAGIC2 0x46505845U
+#define FP_XSTATE_MAGIC1 0x46505853U
+#define FP_XSTATE_MAGIC2 0x46505845U
#ifndef FP_XSTATE_MAGIC2_SIZE
-#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2)
+#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2)
#endif
-#define XSTATE_FP 0x1
-#define XSTATE_SSE 0x2
-#define XSTATE_YMM 0x4
+#define XSTATE_FP 0x1
+#define XSTATE_SSE 0x2
+#define XSTATE_YMM 0x4
-#define FXSAVE_SIZE 512
-#define XSAVE_SIZE 4096
+#define FXSAVE_SIZE 512
+/*
+ * This used to be 4096 (one page). There is a comment below concerning
+ * this size:
+ * "One page should be enough for the whole xsave state ;-)"
+ * Which is kind of funny as it is no longer enough ;-)
+ *
+ * Older CPUs:
+ * # cpuid -1 -l 0xd -s 0
+ * ...
+ * bytes required by XSAVE/XRSTOR area = 0x00000988 (2440)
+ *
+ * Newer CPUs (Sapphire Rapids):
+ * # cpuid -1 -l 0xd -s 0
+ * ...
+ * bytes required by XSAVE/XRSTOR area = 0x00002b00 (11008)
+ *
+ * So one page is no longer enough... But:
+ *
+ * Four pages should be enough for the whole xsave state ;-)
+ */
-#define XSAVE_HDR_SIZE 64
-#define XSAVE_HDR_OFFSET FXSAVE_SIZE
+#define XSAVE_SIZE 4*4096
-#define XSAVE_YMM_SIZE 256
-#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
+#define XSAVE_HDR_SIZE 64
+#define XSAVE_HDR_OFFSET FXSAVE_SIZE
+
+#define XSAVE_YMM_SIZE 256
+#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
/*
* List of XSAVE features Linux knows about:
@@ -52,91 +73,93 @@ enum xfeature {
XFEATURE_MAX,
};
-#define XSTATE_CPUID 0x0000000d
+#define XSTATE_CPUID 0x0000000d
-#define XFEATURE_MASK_FP (1 << XFEATURE_FP)
-#define XFEATURE_MASK_SSE (1 << XFEATURE_SSE)
-#define XFEATURE_MASK_YMM (1 << XFEATURE_YMM)
-#define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS)
-#define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR)
-#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK)
-#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256)
-#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
-#define XFEATURE_MASK_PT (1 << XFEATURE_PT)
-#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
-#define XFEATURE_MASK_HDC (1 << XFEATURE_HDC)
-#define XFEATURE_MASK_MAX (1 << XFEATURE_MAX)
+#define XFEATURE_MASK_FP (1 << XFEATURE_FP)
+#define XFEATURE_MASK_SSE (1 << XFEATURE_SSE)
+#define XFEATURE_MASK_YMM (1 << XFEATURE_YMM)
+#define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS)
+#define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR)
+#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK)
+#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256)
+#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
+#define XFEATURE_MASK_PT (1 << XFEATURE_PT)
+#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
+#define XFEATURE_MASK_HDC (1 << XFEATURE_HDC)
+#define XFEATURE_MASK_MAX (1 << XFEATURE_MAX)
-#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
-#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM)
+#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
+#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM)
-#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
+#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
/* Supervisor features */
-#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT | XFEATURE_HDC)
+#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT | XFEATURE_HDC)
/* All currently supported features */
-#define XCNTXT_MASK \
- (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | \
- XFEATURE_MASK_YMM | XFEATURE_MASK_OPMASK | \
- XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM | \
- XFEATURE_MASK_PKRU | XFEATURE_MASK_BNDREGS | \
- XFEATURE_MASK_BNDCSR)
+#define XFEATURE_MASK_USER \
+ (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | \
+ XFEATURE_MASK_Hi16_ZMM | XFEATURE_MASK_PKRU | XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)
+
+/* xsave structure features which is safe to fill with garbage (see validate_random_xstate()) */
+#define XFEATURE_MASK_FAULTINJ \
+ (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | \
+ XFEATURE_MASK_Hi16_ZMM)
struct fpx_sw_bytes {
- uint32_t magic1;
- uint32_t extended_size;
- uint64_t xstate_bv;
- uint32_t xstate_size;
- uint32_t padding[7];
+ uint32_t magic1;
+ uint32_t extended_size;
+ uint64_t xstate_bv;
+ uint32_t xstate_size;
+ uint32_t padding[7];
};
struct i387_fxsave_struct {
- uint16_t cwd; /* Control Word */
- uint16_t swd; /* Status Word */
- uint16_t twd; /* Tag Word */
- uint16_t fop; /* Last Instruction Opcode */
+ uint16_t cwd; /* Control Word */
+ uint16_t swd; /* Status Word */
+ uint16_t twd; /* Tag Word */
+ uint16_t fop; /* Last Instruction Opcode */
union {
struct {
- uint64_t rip; /* Instruction Pointer */
- uint64_t rdp; /* Data Pointer */
+ uint64_t rip; /* Instruction Pointer */
+ uint64_t rdp; /* Data Pointer */
};
struct {
- uint32_t fip; /* FPU IP Offset */
- uint32_t fcs; /* FPU IP Selector */
- uint32_t foo; /* FPU Operand Offset */
- uint32_t fos; /* FPU Operand Selector */
+ uint32_t fip; /* FPU IP Offset */
+ uint32_t fcs; /* FPU IP Selector */
+ uint32_t foo; /* FPU Operand Offset */
+ uint32_t fos; /* FPU Operand Selector */
};
};
- uint32_t mxcsr; /* MXCSR Register State */
- uint32_t mxcsr_mask; /* MXCSR Mask */
+ uint32_t mxcsr; /* MXCSR Register State */
+ uint32_t mxcsr_mask; /* MXCSR Mask */
/* 8*16 bytes for each FP-reg = 128 bytes */
- uint32_t st_space[32];
+ uint32_t st_space[32];
/* 16*16 bytes for each XMM-reg = 256 bytes */
- uint32_t xmm_space[64];
+ uint32_t xmm_space[64];
- uint32_t padding[12];
+ uint32_t padding[12];
union {
- uint32_t padding1[12];
- uint32_t sw_reserved[12];
+ uint32_t padding1[12];
+ uint32_t sw_reserved[12];
};
} __aligned(FXSAVE_ALIGN_BYTES);
struct xsave_hdr_struct {
- uint64_t xstate_bv;
- uint64_t xcomp_bv;
- uint64_t reserved[6];
+ uint64_t xstate_bv;
+ uint64_t xcomp_bv;
+ uint64_t reserved[6];
} __packed;
/*
* xstate_header.xcomp_bv[63] indicates that the extended_state_area
* is in compacted format.
*/
-#define XCOMP_BV_COMPACTED_FORMAT ((uint64_t)1 << 63)
+#define XCOMP_BV_COMPACTED_FORMAT ((uint64_t)1 << 63)
/*
* State component 2:
@@ -149,21 +172,21 @@ struct xsave_hdr_struct {
* The high 128 bits are stored here.
*/
struct ymmh_struct {
- uint32_t ymmh_space[64];
+ uint32_t ymmh_space[64];
} __packed;
/* Intel MPX support: */
struct mpx_bndreg {
- uint64_t lower_bound;
- uint64_t upper_bound;
+ uint64_t lower_bound;
+ uint64_t upper_bound;
} __packed;
/*
* State component 3 is used for the 4 128-bit bounds registers
*/
struct mpx_bndreg_state {
- struct mpx_bndreg bndreg[4];
+ struct mpx_bndreg bndreg[4];
} __packed;
/*
@@ -172,8 +195,8 @@ struct mpx_bndreg_state {
* register BNDSTATUS. We call the pair "BNDCSR".
*/
struct mpx_bndcsr {
- uint64_t bndcfgu;
- uint64_t bndstatus;
+ uint64_t bndcfgu;
+ uint64_t bndstatus;
} __packed;
/*
@@ -181,8 +204,8 @@ struct mpx_bndcsr {
*/
struct mpx_bndcsr_state {
union {
- struct mpx_bndcsr bndcsr;
- uint8_t pad_to_64_bytes[64];
+ struct mpx_bndcsr bndcsr;
+ uint8_t pad_to_64_bytes[64];
};
} __packed;
@@ -193,7 +216,7 @@ struct mpx_bndcsr_state {
* k0-k7 (opmask state).
*/
struct avx_512_opmask_state {
- uint64_t opmask_reg[8];
+ uint64_t opmask_reg[8];
} __packed;
/*
@@ -202,7 +225,7 @@ struct avx_512_opmask_state {
* ZMM0_H-ZMM15_H (ZMM_Hi256 state).
*/
struct avx_512_zmm_uppers_state {
- uint64_t zmm_upper[16 * 4];
+ uint64_t zmm_upper[16 * 4];
} __packed;
/*
@@ -210,7 +233,7 @@ struct avx_512_zmm_uppers_state {
* ZMM16-ZMM31 (Hi16_ZMM state).
*/
struct avx_512_hi16_state {
- uint64_t hi16_zmm[16 * 8];
+ uint64_t hi16_zmm[16 * 8];
} __packed;
/*
@@ -218,10 +241,18 @@ struct avx_512_hi16_state {
* 8 bytes long but only 4 bytes is used currently.
*/
struct pkru_state {
- uint32_t pkru;
- uint32_t pad;
+ uint32_t pkru;
+ uint32_t pad;
} __packed;
+/*
+ * State component 11 is Control-flow Enforcement user states
+ */
+struct cet_user_state {
+ uint64_t cet; /* user control-flow settings */
+ uint64_t ssp; /* user shadow stack pointer */
+};
+
/*
* This is our most modern FPU state format, as saved by the XSAVE
* and restored by the XRSTOR instructions.
@@ -232,74 +263,78 @@ struct pkru_state {
* can vary quite a bit between CPUs.
*
*
- * One page should be enough for the whole xsave state.
+ * One page should be enough for the whole xsave state ;-)
+ *
+ * Of course it was not ;-) Now using four pages...
+ *
*/
-#define EXTENDED_STATE_AREA_SIZE (4096 - sizeof(struct i387_fxsave_struct) - sizeof(struct xsave_hdr_struct))
+#define EXTENDED_STATE_AREA_SIZE (XSAVE_SIZE - sizeof(struct i387_fxsave_struct) - sizeof(struct xsave_hdr_struct) - sizeof(struct cet_user_state))
/*
* cpu requires it to be 64 byte aligned
*/
struct xsave_struct {
- struct i387_fxsave_struct i387;
- struct xsave_hdr_struct xsave_hdr;
+ struct i387_fxsave_struct i387;
+ struct xsave_hdr_struct xsave_hdr;
union {
/*
* This ymmh is unndeed, for
* backward compatibility.
*/
- struct ymmh_struct ymmh;
- uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE];
+ struct ymmh_struct ymmh;
+ uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE];
};
+ struct cet_user_state cet;
} __aligned(FP_MIN_ALIGN_BYTES) __packed;
struct xsave_struct_ia32 {
- struct i387_fxsave_struct i387;
- struct xsave_hdr_struct xsave_hdr;
+ struct i387_fxsave_struct i387;
+ struct xsave_hdr_struct xsave_hdr;
union {
/*
* This ymmh is unndeed, for
* backward compatibility.
*/
- struct ymmh_struct ymmh;
- uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE];
+ struct ymmh_struct ymmh;
+ uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE];
};
-} __aligned(FXSAVE_ALIGN_BYTES);
+};
typedef struct {
/*
- * The FPU xsave area must be continious and FP_MIN_ALIGN_BYTES
+ * The FPU xsave area must be continuous and FP_MIN_ALIGN_BYTES
* aligned, thus make sure the compiler won't insert any hole here.
*/
union {
- struct xsave_struct xsave;
- uint8_t __pad[sizeof(struct xsave_struct) + FP_XSTATE_MAGIC2_SIZE];
+ struct xsave_struct xsave;
+ uint8_t __pad[sizeof(struct xsave_struct) + FP_XSTATE_MAGIC2_SIZE];
};
uint8_t has_fpu;
} fpu_state_64_t;
struct user_i387_ia32_struct {
- uint32_t cwd; /* FPU Control Word */
- uint32_t swd; /* FPU Status Word */
- uint32_t twd; /* FPU Tag Word */
- uint32_t fip; /* FPU IP Offset */
- uint32_t fcs; /* FPU IP Selector */
- uint32_t foo; /* FPU Operand Pointer Offset */
- uint32_t fos; /* FPU Operand Pointer Selector */
- uint32_t st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
+ uint32_t cwd; /* FPU Control Word */
+ uint32_t swd; /* FPU Status Word */
+ uint32_t twd; /* FPU Tag Word */
+ uint32_t fip; /* FPU IP Offset */
+ uint32_t fcs; /* FPU IP Selector */
+ uint32_t foo; /* FPU Operand Pointer Offset */
+ uint32_t fos; /* FPU Operand Pointer Selector */
+ uint32_t st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */
};
typedef struct {
struct {
- struct user_i387_ia32_struct i387_ia32;
+ struct user_i387_ia32_struct i387_ia32;
/* Software status information [not touched by FSAVE]: */
- uint32_t status;
+ uint32_t status;
} fregs_state;
union {
- struct xsave_struct_ia32 xsave;
- uint8_t __pad[sizeof(struct xsave_struct) + FP_XSTATE_MAGIC2_SIZE];
+ struct xsave_struct_ia32 xsave;
+ uint8_t __pad[sizeof(struct xsave_struct) + FP_XSTATE_MAGIC2_SIZE];
} __aligned(FXSAVE_ALIGN_BYTES);
} __aligned(FXSAVE_ALIGN_BYTES) fpu_state_ia32_t;
@@ -308,14 +343,17 @@ typedef struct {
*/
typedef struct {
union {
- fpu_state_64_t fpu_state_64;
- fpu_state_ia32_t fpu_state_ia32;
+ fpu_state_64_t fpu_state_64;
+ struct {
+ /* fpu_state_ia32->xsave has to be 64-byte aligned. */
+ uint32_t __pad[2];
+ fpu_state_ia32_t fpu_state_ia32;
+ };
};
uint8_t has_fpu;
} fpu_state_t;
-extern void compel_convert_from_fxsr(struct user_i387_ia32_struct *env,
- struct i387_fxsave_struct *fxsave);
+extern void compel_convert_from_fxsr(struct user_i387_ia32_struct *env, struct i387_fxsave_struct *fxsave);
#endif /* __CR_ASM_FPU_H__ */
diff --git a/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h
index e6d394989..b998c488c 100644
--- a/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h
+++ b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h
@@ -6,57 +6,80 @@
#include
#include
-#define SIGMAX 64
-#define SIGMAX_OLD 31
+#define SIGMAX 64
+#define SIGMAX_OLD 31
+
+#define ARCH_HAS_PTRACE_GET_THREAD_AREA
+
+/*
+ * Linux preserves three TLS segments in GDT.
+ * Offsets in GDT differ between 32-bit and 64-bit machines.
+ * For 64-bit x86 those GDT offsets are the same
+ * for native and compat tasks.
+ */
+#define GDT_ENTRY_TLS_MIN 12
+#define GDT_ENTRY_TLS_MAX 14
+#define GDT_ENTRY_TLS_NUM 3
+typedef struct {
+ user_desc_t desc[GDT_ENTRY_TLS_NUM];
+} tls_t;
+
+struct thread_ctx;
+struct parasite_ctl;
+struct parasite_thread_ctl;
+extern int __compel_arch_fetch_thread_area(int tid, struct thread_ctx *th);
+extern int compel_arch_fetch_thread_area(struct parasite_thread_ctl *tctl);
+extern void compel_arch_get_tls_thread(struct parasite_thread_ctl *tctl, tls_t *out);
+extern void compel_arch_get_tls_task(struct parasite_ctl *ctl, tls_t *out);
typedef struct {
- uint64_t r15;
- uint64_t r14;
- uint64_t r13;
- uint64_t r12;
- uint64_t bp;
- uint64_t bx;
- uint64_t r11;
- uint64_t r10;
- uint64_t r9;
- uint64_t r8;
- uint64_t ax;
- uint64_t cx;
- uint64_t dx;
- uint64_t si;
- uint64_t di;
- uint64_t orig_ax;
- uint64_t ip;
- uint64_t cs;
- uint64_t flags;
- uint64_t sp;
- uint64_t ss;
- uint64_t fs_base;
- uint64_t gs_base;
- uint64_t ds;
- uint64_t es;
- uint64_t fs;
- uint64_t gs;
+ uint64_t r15;
+ uint64_t r14;
+ uint64_t r13;
+ uint64_t r12;
+ uint64_t bp;
+ uint64_t bx;
+ uint64_t r11;
+ uint64_t r10;
+ uint64_t r9;
+ uint64_t r8;
+ uint64_t ax;
+ uint64_t cx;
+ uint64_t dx;
+ uint64_t si;
+ uint64_t di;
+ uint64_t orig_ax;
+ uint64_t ip;
+ uint64_t cs;
+ uint64_t flags;
+ uint64_t sp;
+ uint64_t ss;
+ uint64_t fs_base;
+ uint64_t gs_base;
+ uint64_t ds;
+ uint64_t es;
+ uint64_t fs;
+ uint64_t gs;
} user_regs_struct64;
typedef struct {
- uint32_t bx;
- uint32_t cx;
- uint32_t dx;
- uint32_t si;
- uint32_t di;
- uint32_t bp;
- uint32_t ax;
- uint32_t ds;
- uint32_t es;
- uint32_t fs;
- uint32_t gs;
- uint32_t orig_ax;
- uint32_t ip;
- uint32_t cs;
- uint32_t flags;
- uint32_t sp;
- uint32_t ss;
+ uint32_t bx;
+ uint32_t cx;
+ uint32_t dx;
+ uint32_t si;
+ uint32_t di;
+ uint32_t bp;
+ uint32_t ax;
+ uint32_t ds;
+ uint32_t es;
+ uint32_t fs;
+ uint32_t gs;
+ uint32_t orig_ax;
+ uint32_t ip;
+ uint32_t cs;
+ uint32_t flags;
+ uint32_t sp;
+ uint32_t ss;
} user_regs_struct32;
/*
@@ -73,22 +96,17 @@ typedef struct {
short __is_native; /* use user_regs_native macro to check it */
} user_regs_struct_t;
-#define NATIVE_MAGIC 0x0A
-#define COMPAT_MAGIC 0x0C
+#define NATIVE_MAGIC 0x0A
+#define COMPAT_MAGIC 0x0C
static inline bool user_regs_native(user_regs_struct_t *pregs)
{
return pregs->__is_native == NATIVE_MAGIC;
}
-#define get_user_reg(pregs, name) \
- ((user_regs_native(pregs)) ? \
- ((pregs)->native.name) : \
- ((pregs)->compat.name))
+#define get_user_reg(pregs, name) ((user_regs_native(pregs)) ? ((pregs)->native.name) : ((pregs)->compat.name))
-#define set_user_reg(pregs, name, val) \
- ((user_regs_native(pregs)) ? \
- ((pregs)->native.name = (val)) : \
- ((pregs)->compat.name = (val)))
+#define set_user_reg(pregs, name, val) \
+ ((user_regs_native(pregs)) ? ((pregs)->native.name = (val)) : ((pregs)->compat.name = (val)))
#if 0
typedef struct {
@@ -109,12 +127,13 @@ typedef struct {
typedef struct xsave_struct user_fpregs_struct_t;
-#define REG_RES(regs) get_user_reg(®s, ax)
-#define REG_IP(regs) get_user_reg(®s, ip)
-#define REG_SP(regs) get_user_reg(®s, sp)
-#define REG_SYSCALL_NR(regs) get_user_reg(®s, orig_ax)
+#define REG_RES(regs) get_user_reg(®s, ax)
+#define REG_IP(regs) get_user_reg(®s, ip)
+#define SET_REG_IP(regs, val) set_user_reg(®s, ip, val)
+#define REG_SP(regs) get_user_reg(®s, sp)
+#define REG_SYSCALL_NR(regs) get_user_reg(®s, orig_ax)
-#define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall)
+#define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall)
/*
* For x86_32 __NR_mmap inside the kernel represents old_mmap system
@@ -124,4 +143,11 @@ typedef struct xsave_struct user_fpregs_struct_t;
*/
#define __NR32_mmap __NR32_mmap2
+extern bool __compel_shstk_enabled(user_fpregs_struct_t *ext_regs);
+#define compel_shstk_enabled __compel_shstk_enabled
+
+extern int __parasite_setup_shstk(struct parasite_ctl *ctl,
+ user_fpregs_struct_t *ext_regs);
+#define parasite_setup_shstk __parasite_setup_shstk
+
#endif /* UAPI_COMPEL_ASM_TYPES_H__ */
diff --git a/compel/arch/x86/src/lib/include/uapi/asm/processor-flags.h b/compel/arch/x86/src/lib/include/uapi/asm/processor-flags.h
index 9f1bccdbe..caa784557 100644
--- a/compel/arch/x86/src/lib/include/uapi/asm/processor-flags.h
+++ b/compel/arch/x86/src/lib/include/uapi/asm/processor-flags.h
@@ -7,7 +7,7 @@
* EFLAGS bits
*/
#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
-#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */
+#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */
#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */
#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
@@ -16,7 +16,7 @@
#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
+#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
diff --git a/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h b/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h
index 51ca023f7..4a2e67559 100644
--- a/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h
+++ b/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h
@@ -11,60 +11,60 @@
#define SIGFRAME_MAX_OFFSET 8
struct rt_sigcontext {
- uint64_t r8;
- uint64_t r9;
- uint64_t r10;
- uint64_t r11;
- uint64_t r12;
- uint64_t r13;
- uint64_t r14;
- uint64_t r15;
- uint64_t rdi;
- uint64_t rsi;
- uint64_t rbp;
- uint64_t rbx;
- uint64_t rdx;
- uint64_t rax;
- uint64_t rcx;
- uint64_t rsp;
- uint64_t rip;
- uint64_t eflags;
- uint16_t cs;
- uint16_t gs;
- uint16_t fs;
- uint16_t ss;
- uint64_t err;
- uint64_t trapno;
- uint64_t oldmask;
- uint64_t cr2;
- uint64_t fpstate;
- uint64_t reserved1[8];
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ uint64_t r14;
+ uint64_t r15;
+ uint64_t rdi;
+ uint64_t rsi;
+ uint64_t rbp;
+ uint64_t rbx;
+ uint64_t rdx;
+ uint64_t rax;
+ uint64_t rcx;
+ uint64_t rsp;
+ uint64_t rip;
+ uint64_t eflags;
+ uint16_t cs;
+ uint16_t gs;
+ uint16_t fs;
+ uint16_t ss;
+ uint64_t err;
+ uint64_t trapno;
+ uint64_t oldmask;
+ uint64_t cr2;
+ uint64_t fpstate;
+ uint64_t reserved1[8];
};
struct rt_sigcontext_32 {
- uint32_t gs;
- uint32_t fs;
- uint32_t es;
- uint32_t ds;
- uint32_t di;
- uint32_t si;
- uint32_t bp;
- uint32_t sp;
- uint32_t bx;
- uint32_t dx;
- uint32_t cx;
- uint32_t ax;
- uint32_t trapno;
- uint32_t err;
- uint32_t ip;
- uint32_t cs;
- uint32_t flags;
- uint32_t sp_at_signal;
- uint32_t ss;
+ uint32_t gs;
+ uint32_t fs;
+ uint32_t es;
+ uint32_t ds;
+ uint32_t di;
+ uint32_t si;
+ uint32_t bp;
+ uint32_t sp;
+ uint32_t bx;
+ uint32_t dx;
+ uint32_t cx;
+ uint32_t ax;
+ uint32_t trapno;
+ uint32_t err;
+ uint32_t ip;
+ uint32_t cs;
+ uint32_t flags;
+ uint32_t sp_at_signal;
+ uint32_t ss;
- uint32_t fpstate;
- uint32_t oldmask;
- uint32_t cr2;
+ uint32_t fpstate;
+ uint32_t oldmask;
+ uint32_t cr2;
};
#include
@@ -74,71 +74,70 @@ struct rt_sigcontext_32 {
* when (if) other architectures will support compatible C/R
*/
-typedef uint32_t compat_uptr_t;
-typedef uint32_t compat_size_t;
-typedef uint32_t compat_sigset_word;
+typedef uint32_t compat_uptr_t;
+typedef uint32_t compat_size_t;
+typedef uint32_t compat_sigset_word;
typedef struct compat_siginfo {
- int si_signo;
- int si_errno;
- int si_code;
- int _pad[128/sizeof(int) - 3];
+ int si_signo;
+ int si_errno;
+ int si_code;
+ int _pad[128 / sizeof(int) - 3];
} compat_siginfo_t;
typedef struct compat_sigaltstack {
- compat_uptr_t ss_sp;
- int ss_flags;
- compat_size_t ss_size;
+ compat_uptr_t ss_sp;
+ int ss_flags;
+ compat_size_t ss_size;
} compat_stack_t;
-#define _COMPAT_NSIG 64
-#define _COMPAT_NSIG_BPW 32
-#define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW)
+#define _COMPAT_NSIG 64
+#define _COMPAT_NSIG_BPW 32
+#define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW)
typedef struct {
- compat_sigset_word sig[_COMPAT_NSIG_WORDS];
+ compat_sigset_word sig[_COMPAT_NSIG_WORDS];
} compat_sigset_t;
struct ucontext_ia32 {
- unsigned int uc_flags;
- unsigned int uc_link;
- compat_stack_t uc_stack;
- struct rt_sigcontext_32 uc_mcontext;
- compat_sigset_t uc_sigmask; /* mask last for extensibility */
+ unsigned int uc_flags;
+ unsigned int uc_link;
+ compat_stack_t uc_stack;
+ struct rt_sigcontext_32 uc_mcontext;
+ compat_sigset_t uc_sigmask; /* mask last for extensibility */
};
struct rt_sigframe_ia32 {
- uint32_t pretcode;
- int32_t sig;
- uint32_t pinfo;
- uint32_t puc;
- compat_siginfo_t info;
- struct ucontext_ia32 uc;
- char retcode[8];
+ uint32_t pretcode;
+ int32_t sig;
+ uint32_t pinfo;
+ uint32_t puc;
+ compat_siginfo_t info;
+ struct ucontext_ia32 uc;
+ char retcode[8];
/* fp state follows here */
- fpu_state_t fpu_state;
+ fpu_state_t fpu_state;
};
struct rt_sigframe_64 {
- char *pretcode;
- struct rt_ucontext uc;
- struct rt_siginfo info;
+ char *pretcode;
+ struct rt_ucontext uc;
+ struct rt_siginfo info;
/* fp state follows here */
- fpu_state_t fpu_state;
+ fpu_state_t fpu_state;
};
struct rt_sigframe {
union {
- struct rt_sigframe_ia32 compat;
- struct rt_sigframe_64 native;
+ struct rt_sigframe_ia32 compat;
+ struct rt_sigframe_64 native;
};
bool is_native;
};
-static inline
-void rt_sigframe_copy_sigset(struct rt_sigframe *to, k_rtsigset_t *from)
+static inline void rt_sigframe_copy_sigset(struct rt_sigframe *to, k_rtsigset_t *from)
{
size_t sz = sizeof(k_rtsigset_t);
@@ -149,8 +148,7 @@ void rt_sigframe_copy_sigset(struct rt_sigframe *to, k_rtsigset_t *from)
memcpy(&to->compat.uc.uc_sigmask, from, sz);
}
-static inline
-void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)
+static inline void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)
{
size_t sz = sizeof(k_rtsigset_t);
@@ -160,15 +158,11 @@ void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)
memset(&sigframe->compat.uc.uc_sigmask, 0, sz);
}
-#define RT_SIGFRAME_REGIP(rt_sigframe) \
- ((rt_sigframe->is_native) ? \
- (rt_sigframe)->native.uc.uc_mcontext.rip : \
- (rt_sigframe)->compat.uc.uc_mcontext.ip)
+#define RT_SIGFRAME_REGIP(rt_sigframe) \
+ ((rt_sigframe->is_native) ? (rt_sigframe)->native.uc.uc_mcontext.rip : (rt_sigframe)->compat.uc.uc_mcontext.ip)
-#define RT_SIGFRAME_FPU(rt_sigframe) \
- ((rt_sigframe->is_native) ? \
- (&(rt_sigframe)->native.fpu_state) : \
- (&(rt_sigframe)->compat.fpu_state))
+#define RT_SIGFRAME_FPU(rt_sigframe) \
+ ((rt_sigframe->is_native) ? (&(rt_sigframe)->native.fpu_state) : (&(rt_sigframe)->compat.fpu_state))
#define RT_SIGFRAME_HAS_FPU(rt_sigframe) (RT_SIGFRAME_FPU(rt_sigframe)->has_fpu)
@@ -178,9 +172,28 @@ void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)
* - compatible is in sys32_rt_sigreturn at arch/x86/ia32/ia32_signal.c
* - native is in sys_rt_sigreturn at arch/x86/kernel/signal.c
*/
-#define RT_SIGFRAME_OFFSET(rt_sigframe) (((rt_sigframe)->is_native) ? 8 : 4 )
+#define RT_SIGFRAME_OFFSET(rt_sigframe) (((rt_sigframe)->is_native) ? 8 : 4)
-#define USER32_CS 0x23
+#define USER32_CS 0x23
+
+/* clang-format off */
+/*
+ * rst_sigreturn in resorer is noninline call which adds an entry to the
+ * shadow stack above the sigframe token;
+ * if shadow stack is enabled, increment the shadow stack pointer to remove
+ * that entry
+ */
+#define ARCH_SHSTK_POP() \
+ asm volatile( \
+ "xor %%rax, %%rax\n" \
+ "rdsspq %%rax\n" \
+ "cmpq $0, %%rax\n" \
+ "jz 1f\n" \
+ "movq $1, %%rax\n" \
+ "incsspq %%rax\n" \
+ "1:\n" \
+ : : \
+ : "rax")
#define ARCH_RT_SIGRETURN_NATIVE(new_sp) \
asm volatile( \
@@ -194,7 +207,9 @@ void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)
#define ARCH_RT_SIGRETURN_COMPAT(new_sp) \
asm volatile( \
"pushq $"__stringify(USER32_CS)" \n" \
- "pushq $1f \n" \
+ "xor %%rax, %%rax \n" \
+ "movl $1f, %%eax \n" \
+ "pushq %%rax \n" \
"lretq \n" \
"1: \n" \
".code32 \n" \
@@ -206,13 +221,23 @@ void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe)
: "rdi"(new_sp) \
: "eax", "r8", "r9", "r10", "r11", "memory")
-#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe) \
+#define ARCH_RT_SIGRETURN_RST(new_sp, rt_sigframe) \
+do { \
+ if ((rt_sigframe)->is_native) { \
+ ARCH_SHSTK_POP(); \
+ ARCH_RT_SIGRETURN_NATIVE(new_sp); \
+ } else \
+ ARCH_RT_SIGRETURN_COMPAT(new_sp); \
+} while (0)
+
+#define ARCH_RT_SIGRETURN_DUMP(new_sp, rt_sigframe) \
do { \
if ((rt_sigframe)->is_native) \
- ARCH_RT_SIGRETURN_NATIVE(new_sp); \
+ return new_sp; \
else \
ARCH_RT_SIGRETURN_COMPAT(new_sp); \
} while (0)
+/* clang-format off */
int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe,
struct rt_sigframe *rsigframe);
diff --git a/compel/arch/x86/src/lib/infect.c b/compel/arch/x86/src/lib/infect.c
index 11e7f4c91..afcf2c53b 100644
--- a/compel/arch/x86/src/lib/infect.c
+++ b/compel/arch/x86/src/lib/infect.c
@@ -3,6 +3,9 @@
#include
#include
#include
+#include
+#include
+#include
#include
@@ -21,29 +24,37 @@
#include "log.h"
#ifndef NT_X86_XSTATE
-#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
+#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
#endif
+
+#ifndef NT_X86_SHSTK
+#define NT_X86_SHSTK 0x204 /* x86 shstk state */
+#endif
+
+#ifndef ARCH_SHSTK_STATUS
+#define ARCH_SHSTK_STATUS 0x5005
+#define ARCH_SHSTK_SHSTK (1ULL << 0)
+#endif
+
#ifndef NT_PRSTATUS
-#define NT_PRSTATUS 1 /* Contains copy of prstatus struct */
+#define NT_PRSTATUS 1 /* Contains copy of prstatus struct */
#endif
/*
* Injected syscall instruction
*/
const char code_syscall[] = {
- 0x0f, 0x05, /* syscall */
- 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc /* int 3, ... */
+ 0x0f, 0x05, /* syscall */
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc /* int 3, ... */
};
const char code_int_80[] = {
- 0xcd, 0x80, /* int $0x80 */
- 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc /* int 3, ... */
+ 0xcd, 0x80, /* int $0x80 */
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc /* int 3, ... */
};
-static const int
-code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long));
-static const int
-code_int_80_aligned = round_up(sizeof(code_syscall), sizeof(long));
+static const int code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long));
+static const int code_int_80_aligned = round_up(sizeof(code_syscall), sizeof(long));
static inline __always_unused void __check_code_syscall(void)
{
@@ -54,22 +65,22 @@ static inline __always_unused void __check_code_syscall(void)
/* 10-byte legacy floating point register */
struct fpreg {
- uint16_t significand[4];
- uint16_t exponent;
+ uint16_t significand[4];
+ uint16_t exponent;
};
/* 16-byte floating point register */
struct fpxreg {
- uint16_t significand[4];
- uint16_t exponent;
- uint16_t padding[3];
+ uint16_t significand[4];
+ uint16_t exponent;
+ uint16_t padding[3];
};
-#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16)
-#define FP_EXP_TAG_VALID 0
-#define FP_EXP_TAG_ZERO 1
-#define FP_EXP_TAG_SPECIAL 2
-#define FP_EXP_TAG_EMPTY 3
+#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n)*16)
+#define FP_EXP_TAG_VALID 0
+#define FP_EXP_TAG_ZERO 1
+#define FP_EXP_TAG_SPECIAL 2
+#define FP_EXP_TAG_EMPTY 3
static inline uint32_t twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
{
@@ -89,9 +100,7 @@ static inline uint32_t twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
tag = FP_EXP_TAG_SPECIAL;
break;
case 0x0000:
- if (!st->significand[0] &&
- !st->significand[1] &&
- !st->significand[2] &&
+ if (!st->significand[0] && !st->significand[1] && !st->significand[2] &&
!st->significand[3])
tag = FP_EXP_TAG_ZERO;
else
@@ -112,8 +121,7 @@ static inline uint32_t twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
return ret;
}
-void compel_convert_from_fxsr(struct user_i387_ia32_struct *env,
- struct i387_fxsave_struct *fxsave)
+void compel_convert_from_fxsr(struct user_i387_ia32_struct *env, struct i387_fxsave_struct *fxsave)
{
struct fpxreg *from = (struct fpxreg *)&fxsave->st_space[0];
struct fpreg *to = (struct fpreg *)env->st_space;
@@ -137,16 +145,12 @@ void compel_convert_from_fxsr(struct user_i387_ia32_struct *env,
memcpy(&to[i], &from[i], sizeof(to[0]));
}
-int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe,
- user_regs_struct_t *regs,
- user_fpregs_struct_t *fpregs)
+int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
{
bool is_native = user_regs_native(regs);
- fpu_state_t *fpu_state = is_native ?
- &sigframe->native.fpu_state :
- &sigframe->compat.fpu_state;
+ fpu_state_t *fpu_state = is_native ? &sigframe->native.fpu_state : &sigframe->compat.fpu_state;
if (is_native) {
-#define cpreg64_native(d, s) sigframe->native.uc.uc_mcontext.d = regs->native.s
+#define cpreg64_native(d, s) sigframe->native.uc.uc_mcontext.d = regs->native.s
cpreg64_native(rdi, di);
cpreg64_native(rsi, si);
cpreg64_native(rbp, bp);
@@ -170,7 +174,7 @@ int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe,
sigframe->is_native = true;
#undef cpreg64_native
} else {
-#define cpreg32_compat(d) sigframe->compat.uc.uc_mcontext.d = regs->compat.d
+#define cpreg32_compat(d) sigframe->compat.uc.uc_mcontext.d = regs->compat.d
cpreg32_compat(gs);
cpreg32_compat(fs);
cpreg32_compat(es);
@@ -203,34 +207,38 @@ int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe,
return 0;
}
-int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe,
- struct rt_sigframe *rsigframe)
+int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe)
{
- fpu_state_t *fpu_state = (sigframe->is_native) ?
- &rsigframe->native.fpu_state :
- &rsigframe->compat.fpu_state;
+ fpu_state_t *fpu_state = (sigframe->is_native) ? &rsigframe->native.fpu_state : &rsigframe->compat.fpu_state;
if (sigframe->is_native) {
unsigned long addr = (unsigned long)(void *)&fpu_state->fpu_state_64.xsave;
if ((addr % 64ul)) {
- pr_err("Unaligned address passed: %lx (native %d)\n",
- addr, sigframe->is_native);
+ pr_err("Unaligned address passed: %lx (native %d)\n", addr, sigframe->is_native);
return -1;
}
sigframe->native.uc.uc_mcontext.fpstate = (uint64_t)addr;
} else if (!sigframe->is_native) {
- sigframe->compat.uc.uc_mcontext.fpstate =
- (uint32_t)(unsigned long)(void *)&fpu_state->fpu_state_ia32;
+ sigframe->compat.uc.uc_mcontext.fpstate = (uint32_t)(unsigned long)(void *)&fpu_state->fpu_state_ia32;
}
return 0;
}
-#define get_signed_user_reg(pregs, name) \
- ((user_regs_native(pregs)) ? (int64_t)((pregs)->native.name) : \
- (int32_t)((pregs)->compat.name))
+#define get_signed_user_reg(pregs, name) \
+ ((user_regs_native(pregs)) ? (int64_t)((pregs)->native.name) : (int32_t)((pregs)->compat.name))
+
+static int get_task_fpregs(pid_t pid, user_fpregs_struct_t *xsave)
+{
+ if (ptrace(PTRACE_GETFPREGS, pid, NULL, xsave)) {
+ pr_perror("Can't obtain FPU registers for %d", pid);
+ return -1;
+ }
+
+ return 0;
+}
static int get_task_xsave(pid_t pid, user_fpregs_struct_t *xsave)
{
@@ -244,27 +252,157 @@ static int get_task_xsave(pid_t pid, user_fpregs_struct_t *xsave)
return -1;
}
- return 0;
-}
+ if ((xsave->xsave_hdr.xstate_bv & 3) != 3) {
+ // Due to init-optimisation [1] x87 FPU or SSE state may not be filled in.
+ // Since those are restored unconditionally, make sure the init values are
+ // filled by retrying with old PTRACE_GETFPREGS.
+ //
+ // [1] Intel® 64 and IA-32 Architectures Software Developer's
+ // Manual Volume 1: Basic Architecture
+ // Section 13.6: Processor tracking of XSAVE-managed state
+ if (get_task_fpregs(pid, xsave))
+ return -1;
+ }
-static int get_task_fpregs(pid_t pid, user_fpregs_struct_t *xsave)
-{
- if (ptrace(PTRACE_GETFPREGS, pid, NULL, xsave)) {
- pr_perror("Can't obtain FPU registers for %d", pid);
- return -1;
+ /*
+ * xsave may be on stack, if we don't clear it explicitly we get
+ * funky shadow stack state
+ */
+ memset(&xsave->cet, 0, sizeof(xsave->cet));
+ if (compel_cpu_has_feature(X86_FEATURE_SHSTK)) {
+ unsigned long ssp = 0;
+ unsigned long features = 0;
+
+ if (ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long)&features, ARCH_SHSTK_STATUS)) {
+ /*
+ * kernels that don't support shadow stack return
+ * -EINVAL
+ */
+ if (errno == EINVAL)
+ return 0;
+
+ pr_perror("shstk: can't get shadow stack status for %d", pid);
+ return -1;
+ }
+
+ if (!(features & ARCH_SHSTK_SHSTK))
+ return 0;
+
+ iov.iov_base = &ssp;
+ iov.iov_len = sizeof(ssp);
+
+ if (ptrace(PTRACE_GETREGSET, pid, (unsigned int)NT_X86_SHSTK, &iov) < 0) {
+ /* ENODEV means CET is not supported by the CPU */
+ if (errno != ENODEV) {
+ pr_perror("shstk: can't get SSP for %d", pid);
+ return -1;
+ }
+ }
+
+ xsave->cet.cet = features;
+ xsave->cet.ssp = ssp;
+
+ pr_debug("%d: shstk: cet: %lx ssp: %lx\n", pid, xsave->cet.cet, xsave->cet.ssp);
}
return 0;
}
-int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save,
- void *arg, unsigned long flags)
+static inline void fixup_mxcsr(struct xsave_struct *xsave)
+{
+ /*
+ * Right now xsave->i387.mxcsr filled with the random garbage,
+ * let's make it valid by applying mask which allows all
+ * features, except the denormals-are-zero feature bit.
+ *
+ * See also fpu__init_system_mxcsr function:
+ * https://github.com/torvalds/linux/blob/8cb1ae19/arch/x86/kernel/fpu/init.c#L117
+ */
+ xsave->i387.mxcsr &= 0x0000ffbf;
+}
+
+/* See arch/x86/kernel/fpu/xstate.c */
+static void validate_random_xstate(struct xsave_struct *xsave)
+{
+ struct xsave_hdr_struct *hdr = &xsave->xsave_hdr;
+ unsigned int i;
+
+ /* No unknown or supervisor features may be set */
+ hdr->xstate_bv &= XFEATURE_MASK_USER;
+ hdr->xstate_bv &= ~XFEATURE_MASK_SUPERVISOR;
+ hdr->xstate_bv &= XFEATURE_MASK_FAULTINJ;
+
+ for (i = 0; i < XFEATURE_MAX; i++) {
+ if (!compel_fpu_has_feature(i))
+ hdr->xstate_bv &= ~(1 << i);
+ }
+
+ /* Userspace must use the uncompacted format */
+ hdr->xcomp_bv = 0;
+
+ /*
+ * If 'reserved' is shrunken to add a new field, make sure to validate
+ * that new field here!
+ */
+ BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
+
+ /* No reserved bits may be set */
+ memset(&hdr->reserved, 0, sizeof(hdr->reserved));
+}
+
+/*
+ * TODO: Put fault-injection under CONFIG_* and move
+ * extended regset corruption to generic code
+ */
+static int corrupt_extregs(pid_t pid)
+{
+ bool use_xsave = compel_cpu_has_feature(X86_FEATURE_OSXSAVE);
+ user_fpregs_struct_t ext_regs;
+ int *rand_to = (int *)&ext_regs;
+ unsigned int seed, init_seed;
+ size_t i;
+
+ init_seed = seed = time(NULL);
+ for (i = 0; i < sizeof(ext_regs) / sizeof(int); i++)
+ *rand_to++ = rand_r(&seed);
+
+ /*
+ * Error log-level as:
+ * - not intended to be used outside of testing;
+ * - zdtm.py will grep it auto-magically from logs
+ * (and the seed will be known from automatic testing).
+ */
+ pr_err("Corrupting %s for %d, seed %u\n", use_xsave ? "xsave" : "fpuregs", pid, init_seed);
+
+ fixup_mxcsr(&ext_regs);
+
+ if (!use_xsave) {
+ if (ptrace(PTRACE_SETFPREGS, pid, NULL, &ext_regs)) {
+ pr_perror("Can't set FPU registers for %d", pid);
+ return -1;
+ }
+ } else {
+ struct iovec iov;
+
+ validate_random_xstate((void *)&ext_regs);
+
+ iov.iov_base = &ext_regs;
+ iov.iov_len = sizeof(ext_regs);
+
+ if (ptrace(PTRACE_SETREGSET, pid, (unsigned int)NT_X86_XSTATE, &iov) < 0) {
+ pr_perror("Can't set xstate for %d", pid);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *xs, save_regs_t save,
+ void *arg, unsigned long flags)
{
- user_fpregs_struct_t xsave = { }, *xs = NULL;
int ret = -1;
- pr_info("Dumping general registers for %d in %s mode\n", pid,
- user_regs_native(regs) ? "native" : "compat");
+ pr_info("Dumping general registers for %d in %s mode\n", pid, user_regs_native(regs) ? "native" : "compat");
/* Did we come from a system call? */
if (get_signed_user_reg(regs, orig_ax) >= 0) {
@@ -288,43 +426,65 @@ int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save,
/*
* FPU fetched either via fxsave or via xsave,
- * thus decode it accrodingly.
+ * thus decode it accordingly.
*/
pr_info("Dumping GP/FPU registers for %d\n", pid);
if (!compel_cpu_has_feature(X86_FEATURE_OSXSAVE)) {
- ret = get_task_fpregs(pid, &xsave);
+ ret = get_task_fpregs(pid, xs);
} else if (unlikely(flags & INFECT_X86_PTRACE_MXCSR_BUG)) {
/*
* get_task_fpregs() will fill FP state,
* get_task_xsave() will overwrite rightly sse/mmx/etc
*/
pr_warn("Skylake xsave fpu bug workaround used\n");
- ret = get_task_fpregs(pid, &xsave);
+ ret = get_task_fpregs(pid, xs);
if (!ret)
- ret = get_task_xsave(pid, &xsave);
+ ret = get_task_xsave(pid, xs);
} else {
- ret = get_task_xsave(pid, &xsave);
+ ret = get_task_xsave(pid, xs);
}
+ if (!ret && unlikely(flags & INFECT_CORRUPT_EXTREGS))
+ ret = corrupt_extregs(pid);
+
if (ret)
goto err;
- xs = &xsave;
out:
- ret = save(arg, regs, xs);
+ ret = save(pid, arg, regs, xs);
err:
return ret;
}
-int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret,
- unsigned long arg1,
- unsigned long arg2,
- unsigned long arg3,
- unsigned long arg4,
- unsigned long arg5,
- unsigned long arg6)
+int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
+{
+ struct iovec iov;
+
+ pr_info("Restoring GP/FPU registers for %d\n", pid);
+
+ if (!compel_cpu_has_feature(X86_FEATURE_OSXSAVE)) {
+ if (ptrace(PTRACE_SETFPREGS, pid, NULL, ext_regs)) {
+ pr_perror("Can't set FPU registers for %d", pid);
+ return -1;
+ }
+ return 0;
+ }
+
+ iov.iov_base = ext_regs;
+ iov.iov_len = sizeof(*ext_regs);
+
+ if (ptrace(PTRACE_SETREGSET, pid, (unsigned int)NT_X86_XSTATE, &iov) < 0) {
+ pr_perror("Can't set FPU registers for %d", pid);
+ return -1;
+ }
+
+ return 0;
+}
+
+int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6)
{
user_regs_struct_t regs = ctl->orig.regs;
bool native = user_regs_native(®s);
@@ -333,51 +493,47 @@ int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret,
if (native) {
user_regs_struct64 *r = ®s.native;
- r->ax = (uint64_t)nr;
- r->di = arg1;
- r->si = arg2;
- r->dx = arg3;
+ r->ax = (uint64_t)nr;
+ r->di = arg1;
+ r->si = arg2;
+ r->dx = arg3;
r->r10 = arg4;
- r->r8 = arg5;
- r->r9 = arg6;
+ r->r8 = arg5;
+ r->r9 = arg6;
err = compel_execute_syscall(ctl, ®s, code_syscall);
} else {
user_regs_struct32 *r = ®s.compat;
- r->ax = (uint32_t)nr;
- r->bx = arg1;
- r->cx = arg2;
- r->dx = arg3;
- r->si = arg4;
- r->di = arg5;
- r->bp = arg6;
+ r->ax = (uint32_t)nr;
+ r->bx = arg1;
+ r->cx = arg2;
+ r->dx = arg3;
+ r->si = arg4;
+ r->di = arg5;
+ r->bp = arg6;
err = compel_execute_syscall(ctl, ®s, code_int_80);
}
- *ret = native ?
- (long)get_user_reg(®s, ax) :
- (int)get_user_reg(®s, ax);
+ *ret = native ? (long)get_user_reg(®s, ax) : (int)get_user_reg(®s, ax);
return err;
}
-void *remote_mmap(struct parasite_ctl *ctl,
- void *addr, size_t length, int prot,
- int flags, int fd, off_t offset)
+void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd, off_t offset)
{
long map;
int err;
bool compat_task = !user_regs_native(&ctl->orig.regs);
- err = compel_syscall(ctl, __NR(mmap, compat_task), &map,
- (unsigned long)addr, length, prot, flags, fd, offset);
+ err = compel_syscall(ctl, __NR(mmap, compat_task), &map, (unsigned long)addr, length, prot, flags, fd, offset);
if (err < 0)
return NULL;
if (map == -EACCES && (prot & PROT_WRITE) && (prot & PROT_EXEC)) {
pr_warn("mmap(PROT_WRITE | PROT_EXEC) failed for %d, "
- "check selinux execmem policy\n", ctl->rpid);
+ "check selinux execmem policy\n",
+ ctl->rpid);
return NULL;
}
if (IS_ERR_VALUE(map)) {
@@ -402,18 +558,17 @@ void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *
{
set_user_reg(regs, ip, new_ip);
if (stack)
- set_user_reg(regs, sp, (unsigned long) stack);
+ set_user_reg(regs, sp, (unsigned long)stack);
/* Avoid end of syscall processing */
set_user_reg(regs, orig_ax, -1);
/* Make sure flags are in known state */
- set_user_reg(regs, flags, get_user_reg(regs, flags) &
- ~(X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_IF));
+ set_user_reg(regs, flags, get_user_reg(regs, flags) & ~(X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_IF));
}
-#define USER32_CS 0x23
-#define USER_CS 0x33
+#define USER32_CS 0x23
+#define USER_CS 0x33
static bool ldt_task_selectors(pid_t pid)
{
@@ -469,54 +624,58 @@ bool arch_can_dump_task(struct parasite_ctl *ctl)
int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s)
{
int native = compel_mode_native(ctl);
- void *where = native ?
- (void *)&s->native.uc.uc_stack :
- (void *)&s->compat.uc.uc_stack;
+ void *where = native ? (void *)&s->native.uc.uc_stack : (void *)&s->compat.uc.uc_stack;
long ret;
int err;
- err = compel_syscall(ctl, __NR(sigaltstack, !native),
- &ret, 0, (unsigned long)where,
- 0, 0, 0, 0);
+ err = compel_syscall(ctl, __NR(sigaltstack, !native), &ret, 0, (unsigned long)where, 0, 0, 0, 0);
return err ? err : ret;
}
/* Copied from the gdb header gdb/nat/x86-dregs.h */
/* Debug registers' indices. */
-#define DR_FIRSTADDR 0
-#define DR_LASTADDR 3
-#define DR_NADDR 4 /* The number of debug address registers. */
-#define DR_STATUS 6 /* Index of debug status register (DR6). */
-#define DR_CONTROL 7 /* Index of debug control register (DR7). */
+#define DR_FIRSTADDR 0
+#define DR_LASTADDR 3
+#define DR_NADDR 4 /* The number of debug address registers. */
+#define DR_STATUS 6 /* Index of debug status register (DR6). */
+#define DR_CONTROL 7 /* Index of debug control register (DR7). */
-#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit. */
-#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit. */
-#define DR_ENABLE_SIZE 2 /* Two enable bits per debug register. */
+#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit. */
+#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit. */
+#define DR_ENABLE_SIZE 2 /* Two enable bits per debug register. */
/* Locally enable the break/watchpoint in the I'th debug register. */
#define X86_DR_LOCAL_ENABLE(i) (1 << (DR_LOCAL_ENABLE_SHIFT + DR_ENABLE_SIZE * (i)))
int ptrace_set_breakpoint(pid_t pid, void *addr)
{
+ k_rtsigset_t block;
int ret;
/* Set a breakpoint */
- if (ptrace(PTRACE_POKEUSER, pid,
- offsetof(struct user, u_debugreg[DR_FIRSTADDR]),
- addr)) {
+ if (ptrace(PTRACE_POKEUSER, pid, offsetof(struct user, u_debugreg[DR_FIRSTADDR]), addr)) {
pr_perror("Unable to setup a breakpoint into %d", pid);
return -1;
}
/* Enable the breakpoint */
- if (ptrace(PTRACE_POKEUSER, pid,
- offsetof(struct user, u_debugreg[DR_CONTROL]),
- X86_DR_LOCAL_ENABLE(DR_FIRSTADDR))) {
+ if (ptrace(PTRACE_POKEUSER, pid, offsetof(struct user, u_debugreg[DR_CONTROL]),
+ X86_DR_LOCAL_ENABLE(DR_FIRSTADDR))) {
pr_perror("Unable to enable the breakpoint for %d", pid);
return -1;
}
+ /*
+ * FIXME(issues/1429): SIGTRAP can't be blocked, otherwise its handler
+ * will be reset to the default one.
+ */
+ ksigfillset(&block);
+ ksigdelset(&block, SIGTRAP);
+ if (ptrace(PTRACE_SETSIGMASK, pid, sizeof(k_rtsigset_t), &block)) {
+ pr_perror("Can't block signals for %d", pid);
+ return -1;
+ }
ret = ptrace(PTRACE_CONT, pid, NULL, NULL);
if (ret) {
pr_perror("Unable to restart the stopped tracee process %d", pid);
@@ -529,9 +688,7 @@ int ptrace_set_breakpoint(pid_t pid, void *addr)
int ptrace_flush_breakpoints(pid_t pid)
{
/* Disable the breakpoint */
- if (ptrace(PTRACE_POKEUSER, pid,
- offsetof(struct user, u_debugreg[DR_CONTROL]),
- 0)) {
+ if (ptrace(PTRACE_POKEUSER, pid, offsetof(struct user, u_debugreg[DR_CONTROL]), 0)) {
pr_perror("Unable to disable the breakpoint for %d", pid);
return -1;
}
@@ -563,8 +720,7 @@ int ptrace_get_regs(pid_t pid, user_regs_struct_t *regs)
}
pr_err("PTRACE_GETREGSET read %zu bytes for pid %d, but native/compat regs sizes are %zu/%zu bytes\n",
- iov.iov_len, pid,
- sizeof(regs->native), sizeof(regs->compat));
+ iov.iov_len, pid, sizeof(regs->native), sizeof(regs->compat));
return -1;
}
@@ -582,11 +738,70 @@ int ptrace_set_regs(pid_t pid, user_regs_struct_t *regs)
return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov);
}
-#define TASK_SIZE ((1UL << 47) - PAGE_SIZE)
+#define TASK_SIZE ((1UL << 47) - PAGE_SIZE)
/*
* Task size may be limited to 3G but we need a
* higher limit, because it's backward compatible.
*/
-#define TASK_SIZE_IA32 (0xffffe000)
+#define TASK_SIZE_IA32 (0xffffe000)
-unsigned long compel_task_size(void) { return TASK_SIZE; }
+unsigned long compel_task_size(void)
+{
+ return TASK_SIZE;
+}
+
+bool __compel_shstk_enabled(user_fpregs_struct_t *ext_regs)
+{
+ if (!compel_cpu_has_feature(X86_FEATURE_SHSTK))
+ return false;
+
+ if (ext_regs->cet.cet & ARCH_SHSTK_SHSTK)
+ return true;
+
+ return false;
+}
+
+int parasite_setup_shstk(struct parasite_ctl *ctl, __maybe_unused user_fpregs_struct_t *ext_regs)
+{
+ pid_t pid = ctl->rpid;
+ unsigned long sa_restorer = ctl->parasite_ip;
+ unsigned long long ssp;
+ unsigned long token;
+ struct iovec iov;
+
+ if (!compel_shstk_enabled(ext_regs))
+ return 0;
+
+ iov.iov_base = &ssp;
+ iov.iov_len = sizeof(ssp);
+ if (ptrace(PTRACE_GETREGSET, pid, (unsigned int)NT_X86_SHSTK, &iov) < 0) {
+ /* ENODEV means CET is not supported by the CPU */
+ if (errno != ENODEV) {
+ pr_perror("shstk: %d: cannot get SSP", pid);
+ return -1;
+ }
+ }
+
+ /* The token is for 64-bit */
+ token = ALIGN_DOWN(ssp, 8);
+ token |= (1UL << 63);
+ ssp = ALIGN_DOWN(ssp, 8) - 8;
+ if (ptrace(PTRACE_POKEDATA, pid, (void *)ssp, token)) {
+ pr_perror("shstk: %d: failed to inject shadow stack token", pid);
+ return -1;
+ }
+
+ ssp = ssp - sizeof(uint64_t);
+ if (ptrace(PTRACE_POKEDATA, pid, (void *)ssp, sa_restorer)) {
+ pr_perror("shstk: %d: failed to inject restorer address", pid);
+ return -1;
+ }
+
+ ssp = ssp + sizeof(uint64_t);
+ if (ptrace(PTRACE_SETREGSET, pid, (unsigned int)NT_X86_SHSTK, &iov) < 0) {
+ pr_perror("shstk: %d: cannot write SSP", pid);
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/compel/arch/x86/src/lib/thread_area.c b/compel/arch/x86/src/lib/thread_area.c
new file mode 100644
index 000000000..271d89dcd
--- /dev/null
+++ b/compel/arch/x86/src/lib/thread_area.c
@@ -0,0 +1,87 @@
+#include
+#include
+#include
+#include
+#include "log.h"
+#include "asm/infect-types.h"
+#include "infect.h"
+#include "infect-priv.h"
+
+#ifndef PTRACE_GET_THREAD_AREA
+#define PTRACE_GET_THREAD_AREA 25
+#endif
+
+/*
+ * For 64-bit applications, TLS (fs_base for Glibc) is in MSR,
+ * which are dumped with the help of ptrace() and restored with
+ * arch_prctl(ARCH_SET_FS/ARCH_SET_GS).
+ *
+ * But SET_FS_BASE will update GDT if base pointer fits in 4 bytes.
+ * Otherwise it will set only MSR, which allows for mixed 64/32-bit
+ * code to use: 2 MSRs as TLS base _and_ 3 GDT entries.
+ * Having in sum 5 TLS pointers, 3 of which are four bytes and
+ * other two eight bytes:
+ * struct thread_struct {
+ * struct desc_struct tls_array[3];
+ * ...
+ * #ifdef CONFIG_X86_64
+ * unsigned long fsbase;
+ * unsigned long gsbase;
+ * #endif
+ * ...
+ * };
+ *
+ * Most x86_64 applications don't use GDT, but mixed code (i.e. Wine)
+ * can use it. Be pessimistic and dump it for 64-bit applications too.
+ */
+int __compel_arch_fetch_thread_area(int tid, struct thread_ctx *th)
+{
+ bool native_mode = user_regs_native(&th->regs);
+ tls_t *ptls = &th->tls;
+ int err, i;
+
+ /* Initialise as not present by default */
+ for (i = 0; i < GDT_ENTRY_TLS_NUM; i++) {
+ user_desc_t *d = &ptls->desc[i];
+
+ memset(d, 0, sizeof(user_desc_t));
+ d->seg_not_present = 1;
+ d->entry_number = GDT_ENTRY_TLS_MIN + i;
+ }
+
+ for (i = 0; i < GDT_ENTRY_TLS_NUM; i++) {
+ user_desc_t *d = &ptls->desc[i];
+
+ err = ptrace(PTRACE_GET_THREAD_AREA, tid, GDT_ENTRY_TLS_MIN + i, d);
+ if (err) {
+ /*
+ * Ignoring absent syscall on !CONFIG_IA32_EMULATION
+ * where such mixed code can't run.
+ * XXX: Add compile CONFIG_X86_IGNORE_64BIT_TLS
+ * (for x86_64 systems with CONFIG_IA32_EMULATION)
+ */
+ if (errno == EIO && native_mode)
+ return 0;
+
+ pr_perror("get_thread_area failed for %d", tid);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int compel_arch_fetch_thread_area(struct parasite_thread_ctl *tctl)
+{
+ return __compel_arch_fetch_thread_area(tctl->tid, &tctl->th);
+}
+
+void compel_arch_get_tls_task(struct parasite_ctl *ctl, tls_t *out)
+{
+ memcpy(out, &ctl->orig.tls, sizeof(tls_t));
+}
+
+void compel_arch_get_tls_thread(struct parasite_thread_ctl *tctl, tls_t *out)
+{
+ memcpy(out, &tctl->th.tls, sizeof(tls_t));
+}
diff --git a/compel/include/elf32-types.h b/compel/include/elf32-types.h
index b516ba17e..8f6c59960 100644
--- a/compel/include/elf32-types.h
+++ b/compel/include/elf32-types.h
@@ -1,16 +1,16 @@
#ifndef COMPEL_ELF32_TYPES_H__
#define COMPEL_ELF32_TYPES_H__
-#define Elf_Ehdr Elf32_Ehdr
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Rel Elf32_Rel
-#define Elf_Rela Elf32_Rela
+#define Elf_Ehdr Elf32_Ehdr
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Rel Elf32_Rel
+#define Elf_Rela Elf32_Rela
-#define ELF_ST_TYPE ELF32_ST_TYPE
-#define ELF_ST_BIND ELF32_ST_BIND
+#define ELF_ST_TYPE ELF32_ST_TYPE
+#define ELF_ST_BIND ELF32_ST_BIND
-#define ELF_R_SYM ELF32_R_SYM
-#define ELF_R_TYPE ELF32_R_TYPE
+#define ELF_R_SYM ELF32_R_SYM
+#define ELF_R_TYPE ELF32_R_TYPE
#endif /* COMPEL_ELF32_TYPES_H__ */
diff --git a/compel/include/elf64-types.h b/compel/include/elf64-types.h
index c4d5f1c72..ce11a2a61 100644
--- a/compel/include/elf64-types.h
+++ b/compel/include/elf64-types.h
@@ -1,16 +1,16 @@
#ifndef COMPEL_ELF64_TYPES_H__
#define COMPEL_ELF64_TYPES_H__
-#define Elf_Ehdr Elf64_Ehdr
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Rel Elf64_Rel
-#define Elf_Rela Elf64_Rela
+#define Elf_Ehdr Elf64_Ehdr
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define Elf_Rel Elf64_Rel
+#define Elf_Rela Elf64_Rela
-#define ELF_ST_TYPE ELF64_ST_TYPE
-#define ELF_ST_BIND ELF64_ST_BIND
+#define ELF_ST_TYPE ELF64_ST_TYPE
+#define ELF_ST_BIND ELF64_ST_BIND
-#define ELF_R_SYM ELF64_R_SYM
-#define ELF_R_TYPE ELF64_R_TYPE
+#define ELF_R_SYM ELF64_R_SYM
+#define ELF_R_TYPE ELF64_R_TYPE
#endif /* COMPEL_ELF64_TYPES_H__ */
diff --git a/compel/include/errno.h b/compel/include/errno.h
index d41fd5391..b4ad1f086 100644
--- a/compel/include/errno.h
+++ b/compel/include/errno.h
@@ -1,9 +1,9 @@
#ifndef __COMPEL_ERRNO_H__
#define __COMPEL_ERRNO_H__
-#define ERESTARTSYS 512
-#define ERESTARTNOINTR 513
-#define ERESTARTNOHAND 514
-#define ERESTART_RESTARTBLOCK 516
+#define ERESTARTSYS 512
+#define ERESTARTNOINTR 513
+#define ERESTARTNOHAND 514
+#define ERESTART_RESTARTBLOCK 516
#endif /* __CR_ERRNO_H__ */
diff --git a/compel/include/infect-priv.h b/compel/include/infect-priv.h
index ec6dd455d..8e78a7f6c 100644
--- a/compel/include/infect-priv.h
+++ b/compel/include/infect-priv.h
@@ -3,69 +3,79 @@
#include
-#define BUILTIN_SYSCALL_SIZE 8
+#define BUILTIN_SYSCALL_SIZE 8
struct thread_ctx {
- k_rtsigset_t sigmask;
- user_regs_struct_t regs;
+ k_rtsigset_t sigmask;
+ user_regs_struct_t regs;
+#ifdef ARCH_HAS_PTRACE_GET_THREAD_AREA
+ tls_t tls;
+#endif
+ user_fpregs_struct_t ext_regs;
};
/* parasite control block */
struct parasite_ctl {
- int rpid; /* Real pid of the victim */
- void *remote_map;
- void *local_map;
- void *sigreturn_addr; /* A place for the breakpoint */
- unsigned long map_length;
+ int rpid; /* Real pid of the victim */
+ void *remote_map;
+ void *local_map;
+ void *sigreturn_addr; /* A place for the breakpoint */
+ unsigned long map_length;
- struct infect_ctx ictx;
+ struct infect_ctx ictx;
/* thread leader data */
- bool daemonized;
+ bool daemonized;
- struct thread_ctx orig;
+ struct thread_ctx orig;
- void *rstack; /* thread leader stack*/
- struct rt_sigframe *sigframe;
- struct rt_sigframe *rsigframe; /* address in a parasite */
+ void *rstack; /* thread leader stack*/
+ struct rt_sigframe *sigframe;
+ struct rt_sigframe *rsigframe; /* address in a parasite */
- void *r_thread_stack; /* stack for non-leader threads */
+ void *r_thread_stack; /* stack for non-leader threads */
- unsigned long parasite_ip; /* service routine start ip */
+ unsigned long parasite_ip; /* service routine start ip */
- unsigned int *addr_cmd; /* addr for command */
- void *addr_args; /* address for arguments */
- unsigned long args_size;
- int tsock; /* transport socket for transferring fds */
+ unsigned int *cmd; /* address for command */
+ void *args; /* address for arguments */
+ unsigned long args_size;
+ int tsock; /* transport socket for transferring fds */
struct parasite_blob_desc pblob;
};
struct parasite_thread_ctl {
- int tid;
- struct parasite_ctl *ctl;
- struct thread_ctx th;
+ int tid;
+ struct parasite_ctl *ctl;
+ struct thread_ctx th;
};
-#define MEMFD_FNAME "CRIUMFD"
-#define MEMFD_FNAME_SZ sizeof(MEMFD_FNAME)
+#define MEMFD_FNAME "CRIUMFD"
+#define MEMFD_FNAME_SZ sizeof(MEMFD_FNAME)
struct ctl_msg;
int parasite_wait_ack(int sockfd, unsigned int cmd, struct ctl_msg *m);
extern void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs);
-extern void *remote_mmap(struct parasite_ctl *ctl,
- void *addr, size_t length, int prot,
- int flags, int fd, off_t offset);
+extern void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd,
+ off_t offset);
extern bool arch_can_dump_task(struct parasite_ctl *ctl);
-extern int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save,
- void *arg, unsigned long flags);
+/*
+ * @regs: general purpose registers
+ * @ext_regs: extended register set (fpu/mmx/sse/etc)
+ * for task that is NULL, restored by sigframe on rt_sigreturn()
+ * @save: callback to dump all info
+ * @flags: see INFECT_* in infect_ctx::flags
+ * @pid: mystery
+ */
+extern int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
+ void *arg, unsigned long flags);
+extern int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs);
+extern int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs);
extern int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s);
-extern int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe,
- user_regs_struct_t *regs,
+extern int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs,
user_fpregs_struct_t *fpregs);
-extern int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe,
- struct rt_sigframe *rsigframe);
-extern int compel_execute_syscall(struct parasite_ctl *ctl,
- user_regs_struct_t *regs, const char *code_syscall);
+extern int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe);
+extern int compel_execute_syscall(struct parasite_ctl *ctl, user_regs_struct_t *regs, const char *code_syscall);
#endif
diff --git a/compel/include/log.h b/compel/include/log.h
index 559f909ce..5250622c8 100644
--- a/compel/include/log.h
+++ b/compel/include/log.h
@@ -1,64 +1,53 @@
#ifndef COMPEL_LOG_H__
#define COMPEL_LOG_H__
-#include "uapi/compel/compel.h"
-#include "uapi/compel/loglevels.h"
+#include
+#include
+
+#include "uapi/compel/log.h"
#ifndef LOG_PREFIX
-# define LOG_PREFIX
+#define LOG_PREFIX
#endif
static inline int pr_quelled(unsigned int loglevel)
{
- return compel_log_get_loglevel() < loglevel
- && loglevel != COMPEL_LOG_MSG;
+ return compel_log_get_loglevel() < loglevel && loglevel != COMPEL_LOG_MSG;
}
-extern void compel_print_on_level(unsigned int loglevel,
- const char *format, ...)
- __attribute__ ((__format__ (__printf__, 2, 3)));
+extern void compel_print_on_level(unsigned int loglevel, const char *format, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
-#define pr_msg(fmt, ...) \
- compel_print_on_level(COMPEL_LOG_MSG, \
- fmt, ##__VA_ARGS__)
+#define pr_msg(fmt, ...) compel_print_on_level(COMPEL_LOG_MSG, fmt, ##__VA_ARGS__)
-#define pr_info(fmt, ...) \
- compel_print_on_level(COMPEL_LOG_INFO, \
- LOG_PREFIX fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...) compel_print_on_level(COMPEL_LOG_INFO, LOG_PREFIX fmt, ##__VA_ARGS__)
-#define pr_err(fmt, ...) \
- compel_print_on_level(COMPEL_LOG_ERROR, \
- "Error (%s:%d): " LOG_PREFIX fmt, \
- __FILE__, __LINE__, ##__VA_ARGS__)
+#define pr_err(fmt, ...) \
+ compel_print_on_level(COMPEL_LOG_ERROR, "Error (%s:%d): " LOG_PREFIX fmt, __FILE__, __LINE__, ##__VA_ARGS__)
-#define pr_err_once(fmt, ...) \
- do { \
- static bool __printed; \
- if (!__printed) { \
- pr_err(fmt, ##__VA_ARGS__); \
- __printed = 1; \
- } \
+#define pr_err_once(fmt, ...) \
+ do { \
+ static bool __printed; \
+ if (!__printed) { \
+ pr_err(fmt, ##__VA_ARGS__); \
+ __printed = 1; \
+ } \
} while (0)
-#define pr_warn(fmt, ...) \
- compel_print_on_level(COMPEL_LOG_WARN, \
- "Warn (%s:%d): " LOG_PREFIX fmt, \
- __FILE__, __LINE__, ##__VA_ARGS__)
+#define pr_warn(fmt, ...) \
+ compel_print_on_level(COMPEL_LOG_WARN, "Warn (%s:%d): " LOG_PREFIX fmt, __FILE__, __LINE__, ##__VA_ARGS__)
-#define pr_warn_once(fmt, ...) \
- do { \
- static bool __printed; \
- if (!__printed) { \
- pr_warn(fmt, ##__VA_ARGS__); \
- __printed = 1; \
- } \
+#define pr_warn_once(fmt, ...) \
+ do { \
+ static bool __printed; \
+ if (!__printed) { \
+ pr_warn(fmt, ##__VA_ARGS__); \
+ __printed = 1; \
+ } \
} while (0)
-#define pr_debug(fmt, ...) \
- compel_print_on_level(COMPEL_LOG_DEBUG, \
- LOG_PREFIX fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...) compel_print_on_level(COMPEL_LOG_DEBUG, LOG_PREFIX fmt, ##__VA_ARGS__)
-#define pr_perror(fmt, ...) \
- pr_err(fmt ": %m\n", ##__VA_ARGS__)
+#define pr_perror(fmt, ...) pr_err(fmt ": %s\n", ##__VA_ARGS__, strerror(errno))
#endif /* COMPEL_LOG_H__ */
diff --git a/compel/include/piegen.h b/compel/include/piegen.h
index fd72f9c22..d445d5349 100644
--- a/compel/include/piegen.h
+++ b/compel/include/piegen.h
@@ -9,19 +9,19 @@
#include "common/compiler.h"
typedef struct {
- char *input_filename;
- char *output_filename;
- char *prefix;
- FILE *fout;
+ char *input_filename;
+ char *output_filename;
+ char *prefix;
+ FILE *fout;
} piegen_opt_t;
extern piegen_opt_t opts;
-#define pr_out(fmt, ...) \
-do { \
- if (opts.fout) \
- fprintf(opts.fout, fmt, ##__VA_ARGS__); \
-} while (0)
+#define pr_out(fmt, ...) \
+ do { \
+ if (opts.fout) \
+ fprintf(opts.fout, fmt, ##__VA_ARGS__); \
+ } while (0)
extern int handle_binary(void *mem, size_t size);
diff --git a/compel/include/ptrace.h b/compel/include/ptrace.h
index 01f55c45a..00013f937 100644
--- a/compel/include/ptrace.h
+++ b/compel/include/ptrace.h
@@ -5,7 +5,9 @@
#include
#include
-#define PTRACE_SI_EVENT(_si_code) (((_si_code) & 0xFFFF) >> 8)
+#define PTRACE_SYSCALL_TRAP 0x80
+
+#define PTRACE_SI_EVENT(_si_code) (((_si_code)&0xFFFF) >> 8)
extern int ptrace_get_regs(pid_t pid, user_regs_struct_t *regs);
extern int ptrace_set_regs(pid_t pid, user_regs_struct_t *regs);
diff --git a/compel/include/rpc-pie-priv.h b/compel/include/rpc-pie-priv.h
index 15f5b14ca..5a6b337b2 100644
--- a/compel/include/rpc-pie-priv.h
+++ b/compel/include/rpc-pie-priv.h
@@ -1,16 +1,22 @@
#ifndef __COMPEL_RPC_H__
#define __COMPEL_RPC_H__
struct ctl_msg {
- uint32_t cmd; /* command itself */
- uint32_t ack; /* ack on command */
- int32_t err; /* error code on reply */
+ uint32_t cmd; /* command itself */
+ uint32_t ack; /* ack on command */
+ int32_t err; /* error code on reply */
};
-#define ctl_msg_cmd(_cmd) \
- (struct ctl_msg){.cmd = _cmd, }
+#define ctl_msg_cmd(_cmd) \
+ (struct ctl_msg) \
+ { \
+ .cmd = _cmd, \
+ }
-#define ctl_msg_ack(_cmd, _err) \
- (struct ctl_msg){.cmd = _cmd, .ack = _cmd, .err = _err, }
+#define ctl_msg_ack(_cmd, _err) \
+ (struct ctl_msg) \
+ { \
+ .cmd = _cmd, .ack = _cmd, .err = _err, \
+ }
/*
* NOTE: each command's args should be arch-independed sized.
@@ -18,7 +24,7 @@ struct ctl_msg {
* alternative type for compatible tasks in parasite-compat.h
*/
enum {
- PARASITE_CMD_IDLE = 0,
+ PARASITE_CMD_IDLE = 0,
PARASITE_CMD_ACK,
PARASITE_CMD_INIT_DAEMON,
@@ -32,19 +38,19 @@ enum {
};
struct parasite_init_args {
- int32_t h_addr_len;
- struct sockaddr_un h_addr;
- int32_t log_level;
- uint64_t sigreturn_addr;
- uint64_t sigframe; /* pointer to sigframe */
- futex_t daemon_connected;
+ int32_t h_addr_len;
+ struct sockaddr_un h_addr;
+ int32_t log_level;
+ uint64_t sigreturn_addr;
+ uint64_t sigframe; /* pointer to sigframe */
+ futex_t daemon_connected;
#ifdef ARCH_HAS_LONG_PAGES
- uint32_t page_size;
+ uint32_t page_size;
#endif
};
struct parasite_unmap_args {
- uint64_t parasite_start;
- uint64_t parasite_len;
+ uint64_t parasite_start;
+ uint64_t parasite_len;
};
#endif
diff --git a/compel/include/shmem.h b/compel/include/shmem.h
index b6f994617..a38599625 100644
--- a/compel/include/shmem.h
+++ b/compel/include/shmem.h
@@ -7,4 +7,3 @@ struct shmem_plugin_msg {
};
#endif /* __COMPEL_PLUGIN_SHMEM_PRIV_H__ */
-
diff --git a/compel/include/uapi/compel.h b/compel/include/uapi/compel.h
deleted file mode 100644
index 318a472da..000000000
--- a/compel/include/uapi/compel.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef UAPI_COMPEL_H__
-#define UAPI_COMPEL_H__
-
-#include
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-
-#endif /* UAPI_COMPEL_H__ */
diff --git a/compel/include/uapi/cpu.h b/compel/include/uapi/cpu.h
index 6f827d447..72c8a516c 100644
--- a/compel/include/uapi/cpu.h
+++ b/compel/include/uapi/cpu.h
@@ -6,7 +6,7 @@
#include
-extern int compel_cpuid(compel_cpuinfo_t *info);
+extern int /* TODO: __must_check */ compel_cpuid(compel_cpuinfo_t *info);
extern bool compel_cpu_has_feature(unsigned int feature);
extern bool compel_fpu_has_feature(unsigned int feature);
extern uint32_t compel_fpu_feature_size(unsigned int feature);
diff --git a/compel/include/uapi/handle-elf.h b/compel/include/uapi/handle-elf.h
index ddeecb0d5..3b6d5bfee 100644
--- a/compel/include/uapi/handle-elf.h
+++ b/compel/include/uapi/handle-elf.h
@@ -1,15 +1,22 @@
#ifndef __COMPEL_UAPI_HANDLE_ELF__
#define __COMPEL_UAPI_HANDLE_ELF__
-#define COMPEL_TYPE_INT (1u << 0)
-#define COMPEL_TYPE_LONG (1u << 1)
-#define COMPEL_TYPE_GOTPCREL (1u << 2)
-
+#define COMPEL_TYPE_INT (1u << 0)
+#define COMPEL_TYPE_LONG (1u << 1)
+#define COMPEL_TYPE_GOTPCREL (1u << 2)
+#ifdef CONFIG_MIPS
+#define COMPEL_TYPE_MIPS_26 (1u << 3)
+#define COMPEL_TYPE_MIPS_HI16 (1u << 4)
+#define COMPEL_TYPE_MIPS_LO16 (1u << 5)
+#define COMPEL_TYPE_MIPS_HIGHER (1u << 6)
+#define COMPEL_TYPE_MIPS_HIGHEST (1u << 7)
+#define COMPEL_TYPE_MIPS_64 (1u << 8)
+#endif
typedef struct {
- unsigned int offset;
- unsigned int type;
- long addend;
- long value;
+ unsigned int offset;
+ unsigned int type;
+ long addend;
+ long value;
} compel_reloc_t;
#endif
diff --git a/compel/include/uapi/infect-rpc.h b/compel/include/uapi/infect-rpc.h
index 0176c1142..d0f853859 100644
--- a/compel/include/uapi/infect-rpc.h
+++ b/compel/include/uapi/infect-rpc.h
@@ -5,13 +5,14 @@
#include
#include
+#include
+
struct parasite_ctl;
-extern int compel_rpc_sync(unsigned int cmd, struct parasite_ctl *ctl);
-extern int compel_rpc_call(unsigned int cmd, struct parasite_ctl *ctl);
-extern int compel_rpc_call_sync(unsigned int cmd, struct parasite_ctl *ctl);
+extern int __must_check compel_rpc_sync(unsigned int cmd, struct parasite_ctl *ctl);
+extern int __must_check compel_rpc_call(unsigned int cmd, struct parasite_ctl *ctl);
+extern int __must_check compel_rpc_call_sync(unsigned int cmd, struct parasite_ctl *ctl);
extern int compel_rpc_sock(struct parasite_ctl *ctl);
-#define PARASITE_USER_CMDS 64
-
+#define PARASITE_USER_CMDS 64
#endif
diff --git a/compel/include/uapi/infect-util.h b/compel/include/uapi/infect-util.h
index 7307ba57a..658df9393 100644
--- a/compel/include/uapi/infect-util.h
+++ b/compel/include/uapi/infect-util.h
@@ -1,6 +1,24 @@
#ifndef __COMPEL_INFECT_UTIL_H__
#define __COMPEL_INFECT_UTIL_H__
+
+#include "common/compiler.h"
+
+/**
+ * The length of the hash is based on what libuuid provides.
+ * According to the manpage this is:
+ *
+ * The uuid_unparse() function converts the supplied UUID uu from the binary
+ * representation into a 36-byte string (plus trailing '\0')
+ */
+#define RUN_ID_HASH_LENGTH 37
+
+/*
+ * compel_run_id is a unique value of the current run. It can be used to
+ * generate resource ID-s to avoid conflicts with other processes.
+ */
+extern char compel_run_id[RUN_ID_HASH_LENGTH];
+
struct parasite_ctl;
-extern int compel_util_send_fd(struct parasite_ctl *ctl, int fd);
+extern int __must_check compel_util_send_fd(struct parasite_ctl *ctl, int fd);
extern int compel_util_recv_fd(struct parasite_ctl *ctl, int *pfd);
#endif
diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h
index 08beaffcd..d21c261b7 100644
--- a/compel/include/uapi/infect.h
+++ b/compel/include/uapi/infect.h
@@ -11,62 +11,74 @@
#include "common/compiler.h"
-#define PARASITE_START_AREA_MIN (4096)
+#define PARASITE_START_AREA_MIN (4096)
-extern int compel_interrupt_task(int pid);
+#define PARASITE_STACK_SIZE (16 << 10)
+/*
+ * A stack redzone is a small, protected region of memory located immediately
+ * after a parasite stack. It is intended to remain unchanged. While it can be
+ * implemented as a guard page, we want to avoid the overhead of additional
+ * remote system calls.
+ */
+#define PARASITE_STACK_REDZONE 128
+
+extern int __must_check compel_interrupt_task(int pid);
struct seize_task_status {
- unsigned long long sigpnd;
- unsigned long long shdpnd;
- char state;
- int ppid;
- int seccomp_mode;
+ unsigned long long sigpnd;
+ unsigned long long shdpnd;
+ unsigned long long sigblk;
+ char state;
+ int vpid;
+ int ppid;
+ int seccomp_mode;
};
-extern int compel_wait_task(int pid, int ppid,
- int (*get_status)(int pid, struct seize_task_status *, void *data),
- void (*free_status)(int pid, struct seize_task_status *, void *data),
- struct seize_task_status *st, void *data);
+extern int __must_check compel_wait_task(int pid, int ppid,
+ int (*get_status)(int pid, struct seize_task_status *, void *data),
+ void (*free_status)(int pid, struct seize_task_status *, void *data),
+ struct seize_task_status *st, void *data);
-extern int compel_stop_task(int pid);
+extern int __must_check compel_stop_task(int pid);
+extern int __must_check compel_parse_stop_signo(int pid);
extern int compel_resume_task(pid_t pid, int orig_state, int state);
+extern int compel_resume_task_sig(pid_t pid, int orig_state, int state, int stop_signo);
struct parasite_ctl;
struct parasite_thread_ctl;
-extern struct parasite_ctl *compel_prepare(int pid);
-extern struct parasite_ctl *compel_prepare_noctx(int pid);
-extern int compel_infect(struct parasite_ctl *ctl, unsigned long nr_threads, unsigned long args_size);
-extern struct parasite_thread_ctl *compel_prepare_thread(struct parasite_ctl *ctl, int pid);
+extern struct parasite_ctl __must_check *compel_prepare(int pid);
+extern struct parasite_ctl __must_check *compel_prepare_noctx(int pid);
+extern int __must_check compel_infect(struct parasite_ctl *ctl, unsigned long nr_threads, unsigned long args_size);
+extern int __must_check compel_infect_no_daemon(struct parasite_ctl *ctl, unsigned long nr_threads,
+ unsigned long args_size);
+extern struct parasite_thread_ctl __must_check *compel_prepare_thread(struct parasite_ctl *ctl, int pid);
extern void compel_release_thread(struct parasite_thread_ctl *);
-extern int compel_stop_daemon(struct parasite_ctl *ctl);
-extern int compel_cure_remote(struct parasite_ctl *ctl);
-extern int compel_cure_local(struct parasite_ctl *ctl);
-extern int compel_cure(struct parasite_ctl *ctl);
+extern int __must_check compel_start_daemon(struct parasite_ctl *ctl);
+extern int __must_check compel_stop_daemon(struct parasite_ctl *ctl);
+extern int __must_check compel_cure_remote(struct parasite_ctl *ctl);
+extern int __must_check compel_cure_local(struct parasite_ctl *ctl);
+extern int __must_check compel_cure(struct parasite_ctl *ctl);
-#define PARASITE_ARG_SIZE_MIN ( 1 << 12)
+#define PARASITE_ARG_SIZE_MIN (1 << 12)
-#define compel_parasite_args(ctl, type) \
- ({ \
- void *___ret; \
- BUILD_BUG_ON(sizeof(type) > PARASITE_ARG_SIZE_MIN); \
- ___ret = compel_parasite_args_p(ctl); \
- ___ret; \
+#define compel_parasite_args(ctl, type) \
+ ({ \
+ void *___ret; \
+ BUILD_BUG_ON(sizeof(type) > PARASITE_ARG_SIZE_MIN); \
+ ___ret = compel_parasite_args_p(ctl); \
+ ___ret; \
})
extern void *compel_parasite_args_p(struct parasite_ctl *ctl);
extern void *compel_parasite_args_s(struct parasite_ctl *ctl, unsigned long args_size);
-extern int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret,
- unsigned long arg1,
- unsigned long arg2,
- unsigned long arg3,
- unsigned long arg4,
- unsigned long arg5,
- unsigned long arg6);
-extern int compel_run_in_thread(struct parasite_thread_ctl *tctl, unsigned int cmd);
-extern int compel_run_at(struct parasite_ctl *ctl, unsigned long ip, user_regs_struct_t *ret_regs);
+extern int __must_check compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1,
+ unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5,
+ unsigned long arg6);
+extern int __must_check compel_run_in_thread(struct parasite_thread_ctl *tctl, unsigned int cmd);
+extern int __must_check compel_run_at(struct parasite_ctl *ctl, unsigned long ip, user_regs_struct_t *ret_regs);
/*
* The PTRACE_SYSCALL will trap task twice -- on
@@ -80,12 +92,11 @@ enum trace_flags {
TRACE_EXIT,
};
-extern int compel_stop_on_syscall(int tasks, int sys_nr,
- int sys_nr_compat, enum trace_flags trace);
+extern int __must_check compel_stop_on_syscall(int tasks, int sys_nr, int sys_nr_compat);
-extern int compel_stop_pie(pid_t pid, void *addr, enum trace_flags *tf, bool no_bp);
+extern int __must_check compel_stop_pie(pid_t pid, void *addr, bool no_bp);
-extern int compel_unmap(struct parasite_ctl *ctl, unsigned long addr);
+extern int __must_check compel_unmap(struct parasite_ctl *ctl, unsigned long addr);
extern int compel_mode_native(struct parasite_ctl *ctl);
@@ -94,78 +105,116 @@ extern k_rtsigset_t *compel_thread_sigmask(struct parasite_thread_ctl *tctl);
struct rt_sigframe;
-typedef int (*open_proc_fn)(int pid, int mode, const char *fmt, ...)
- __attribute__ ((__format__ (__printf__, 3, 4)));
-typedef int (*save_regs_t)(void *, user_regs_struct_t *, user_fpregs_struct_t *);
+typedef int (*open_proc_fn)(int pid, int mode, const char *fmt, ...) __attribute__((__format__(__printf__, 3, 4)));
+typedef int (*save_regs_t)(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *);
typedef int (*make_sigframe_t)(void *, struct rt_sigframe *, struct rt_sigframe *, k_rtsigset_t *);
struct infect_ctx {
- int sock;
+ int sock;
/*
* Regs manipulation context.
*/
- save_regs_t save_regs;
- make_sigframe_t make_sigframe;
+ save_regs_t save_regs;
+ make_sigframe_t make_sigframe;
void *regs_arg;
- unsigned long task_size;
- unsigned long syscall_ip; /* entry point of infection */
- unsigned long flags; /* fine-tune (e.g. faults) */
+ unsigned long task_size;
+ unsigned long syscall_ip; /* entry point of infection */
+ unsigned long flags; /* fine-tune (e.g. faults) */
- void (*child_handler)(int, siginfo_t *, void *); /* hander for SIGCHLD deaths */
- struct sigaction orig_handler;
+ void (*child_handler)(int, siginfo_t *, void *); /* handler for SIGCHLD deaths */
+ struct sigaction orig_handler;
open_proc_fn open_proc;
- int log_fd; /* fd for parasite code to send messages to */
+ int log_fd; /* fd for parasite code to send messages to */
+ unsigned long remote_map_addr; /* User-specified address where to mmap parasitic code, default not set */
};
extern struct infect_ctx *compel_infect_ctx(struct parasite_ctl *);
/* Don't use memfd() */
-#define INFECT_NO_MEMFD (1UL << 0)
+#define INFECT_NO_MEMFD (1UL << 0)
/* Make parasite connect() fail */
-#define INFECT_FAIL_CONNECT (1UL << 1)
+#define INFECT_FAIL_CONNECT (1UL << 1)
/* No breakpoints in pie tracking */
-#define INFECT_NO_BREAKPOINTS (1UL << 2)
+#define INFECT_NO_BREAKPOINTS (1UL << 2)
/* Can run parasite inside compat tasks */
-#define INFECT_COMPATIBLE (1UL << 3)
+#define INFECT_COMPATIBLE (1UL << 3)
/* Workaround for ptrace bug on Skylake CPUs with kernels older than v4.14 */
-#define INFECT_X86_PTRACE_MXCSR_BUG (1UL << 4)
+#define INFECT_X86_PTRACE_MXCSR_BUG (1UL << 4)
+/* After infecting - corrupt extended registers (fault-injection) */
+#define INFECT_CORRUPT_EXTREGS (1UL << 5)
/*
* There are several ways to describe a blob to compel
* library. The simplest one derived from criu is to
* provide it from .h files.
*/
-#define COMPEL_BLOB_CHEADER 0x1
+#define COMPEL_BLOB_CHEADER 0x1
struct parasite_blob_desc {
- unsigned parasite_type;
+ unsigned parasite_type;
union {
struct {
- const void *mem;
- size_t bsize;
- size_t nr_gotpcrel;
- unsigned long parasite_ip_off;
- unsigned long addr_cmd_off;
- unsigned long addr_arg_off;
- compel_reloc_t *relocs;
- unsigned int nr_relocs;
+ const void *mem;
+ size_t bsize;
+ unsigned long parasite_ip_off;
+ unsigned long cmd_off;
+ unsigned long args_ptr_off;
+ unsigned long got_off;
+ unsigned long args_off;
+ unsigned long data_off;
+ compel_reloc_t *relocs;
+ unsigned int nr_relocs;
} hdr;
};
};
extern struct parasite_blob_desc *compel_parasite_blob_desc(struct parasite_ctl *);
-extern int compel_get_thread_regs(struct parasite_thread_ctl *, save_regs_t, void *);
+extern int __must_check compel_get_thread_regs(struct parasite_thread_ctl *, save_regs_t, void *);
-extern void compel_relocs_apply(void *mem, void *vbase, size_t size, compel_reloc_t *elf_relocs, size_t nr_relocs);
+extern void compel_relocs_apply(void *mem, void *vbase, struct parasite_blob_desc *pbd);
+extern void compel_relocs_apply_mips(void *mem, void *vbase, struct parasite_blob_desc *pbd);
extern unsigned long compel_task_size(void);
extern uint64_t compel_get_leader_sp(struct parasite_ctl *ctl);
extern uint64_t compel_get_thread_sp(struct parasite_thread_ctl *tctl);
+extern uint64_t compel_get_leader_ip(struct parasite_ctl *ctl);
+extern uint64_t compel_get_thread_ip(struct parasite_thread_ctl *tctl);
+
+void compel_set_leader_ip(struct parasite_ctl *ctl, uint64_t v);
+void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v);
+
+extern void compel_get_stack(struct parasite_ctl *ctl, void **rstack, void **r_thread_stack);
+
+#ifndef compel_host_supports_gcs
+static inline bool compel_host_supports_gcs(void)
+{
+ return false;
+}
+#define compel_host_supports_gcs
+#endif
+
+#ifndef compel_shstk_enabled
+static inline bool compel_shstk_enabled(user_fpregs_struct_t *ext_regs)
+{
+ return false;
+}
+#define compel_shstk_enabled
+#endif
+
+#ifndef parasite_setup_shstk
+static inline int parasite_setup_shstk(struct parasite_ctl *ctl,
+ user_fpregs_struct_t *ext_regs)
+{
+ return 0;
+}
+#define parasite_setup_shstk parasite_setup_shstk
+#endif
+
#endif
diff --git a/compel/include/uapi/ksigset.h b/compel/include/uapi/ksigset.h
index f6b124bf3..e2465041d 100644
--- a/compel/include/uapi/ksigset.h
+++ b/compel/include/uapi/ksigset.h
@@ -22,4 +22,10 @@ static inline void ksigaddset(k_rtsigset_t *set, int _sig)
int sig = _sig - 1;
set->sig[sig / _NSIG_BPW] |= 1UL << (sig % _NSIG_BPW);
}
+
+static inline void ksigdelset(k_rtsigset_t *set, int _sig)
+{
+ int sig = _sig - 1;
+ set->sig[sig / _NSIG_BPW] &= ~(1UL << (sig % _NSIG_BPW));
+}
#endif
diff --git a/compel/include/uapi/loglevels.h b/compel/include/uapi/loglevels.h
index 7bf88475d..7a49825d2 100644
--- a/compel/include/uapi/loglevels.h
+++ b/compel/include/uapi/loglevels.h
@@ -6,15 +6,14 @@
* also by log functions in the std plugin.
*/
-enum __compel_log_levels
-{
- COMPEL_LOG_MSG, /* Print message regardless of log level */
- COMPEL_LOG_ERROR, /* Errors only, when we're in trouble */
- COMPEL_LOG_WARN, /* Warnings */
- COMPEL_LOG_INFO, /* Informative, everything is fine */
- COMPEL_LOG_DEBUG, /* Debug only */
+enum __compel_log_levels {
+ COMPEL_LOG_MSG, /* Print message regardless of log level */
+ COMPEL_LOG_ERROR, /* Errors only, when we're in trouble */
+ COMPEL_LOG_WARN, /* Warnings */
+ COMPEL_LOG_INFO, /* Informative, everything is fine */
+ COMPEL_LOG_DEBUG, /* Debug only */
- COMPEL_DEFAULT_LOGLEVEL = COMPEL_LOG_WARN
+ COMPEL_DEFAULT_LOGLEVEL = COMPEL_LOG_WARN
};
#endif /* UAPI_COMPEL_LOGLEVELS_H__ */
diff --git a/compel/include/uapi/plugins.h b/compel/include/uapi/plugins.h
index e9ebfb67f..658c95fc9 100644
--- a/compel/include/uapi/plugins.h
+++ b/compel/include/uapi/plugins.h
@@ -1,33 +1,31 @@
#ifndef UAPI_COMPEL_PLUGIN_H__
#define UAPI_COMPEL_PLUGIN_H__
-#define __init __attribute__((__used__)) __attribute__ ((__section__(".compel.init")))
-#define __exit __attribute__((__used__)) __attribute__ ((__section__(".compel.exit")))
+#define __init __attribute__((__used__)) __attribute__((__section__(".compel.init")))
+#define __exit __attribute__((__used__)) __attribute__((__section__(".compel.exit")))
#ifndef __ASSEMBLY__
typedef struct {
- const char *name;
- int (*init)(void);
- void (*exit)(void);
+ const char *name;
+ int (*init)(void);
+ void (*exit)(void);
} plugin_init_t;
-#define plugin_register(___desc) \
- static const plugin_init_t * const \
- ___ptr__##___desc __init = &___desc;
+#define plugin_register(___desc) static const plugin_init_t *const ___ptr__##___desc __init = &___desc;
-#define PLUGIN_REGISTER(___id, ___name, ___init, ___exit) \
- static const plugin_init_t __plugin_desc_##___id = { \
- .name = ___name, \
- .init = ___init, \
- .exit = ___exit, \
- }; \
+#define PLUGIN_REGISTER(___id, ___name, ___init, ___exit) \
+ static const plugin_init_t __plugin_desc_##___id = { \
+ .name = ___name, \
+ .init = ___init, \
+ .exit = ___exit, \
+ }; \
plugin_register(__plugin_desc_##___id);
-#define PLUGIN_REGISTER_DUMMY(___id) \
- static const plugin_init_t __plugin_desc_##___id = { \
- .name = #___id, \
- }; \
+#define PLUGIN_REGISTER_DUMMY(___id) \
+ static const plugin_init_t __plugin_desc_##___id = { \
+ .name = #___id, \
+ }; \
plugin_register(__plugin_desc_##___id);
#endif /* __ASSEMBLY__ */
diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h
index 4df00b6e1..558124fbd 100644
--- a/compel/include/uapi/ptrace.h
+++ b/compel/include/uapi/ptrace.h
@@ -1,6 +1,7 @@
#ifndef UAPI_COMPEL_PTRACE_H__
#define UAPI_COMPEL_PTRACE_H__
+#include "common/compiler.h"
/*
* We'd want to include both sys/ptrace.h and linux/ptrace.h,
* hoping that most definitions come from either one or another.
@@ -18,40 +19,40 @@
*/
#ifndef PTRACE_SEIZE
-# define PTRACE_SEIZE 0x4206
+#define PTRACE_SEIZE 0x4206
#endif
#ifndef PTRACE_O_SUSPEND_SECCOMP
-# define PTRACE_O_SUSPEND_SECCOMP (1 << 21)
+#define PTRACE_O_SUSPEND_SECCOMP (1 << 21)
#endif
#ifndef PTRACE_INTERRUPT
-# define PTRACE_INTERRUPT 0x4207
+#define PTRACE_INTERRUPT 0x4207
#endif
#ifndef PTRACE_PEEKSIGINFO
-#define PTRACE_PEEKSIGINFO 0x4209
+#define PTRACE_PEEKSIGINFO 0x4209
/* Read signals from a shared (process wide) queue */
-#define PTRACE_PEEKSIGINFO_SHARED (1 << 0)
+#define PTRACE_PEEKSIGINFO_SHARED (1 << 0)
#endif
#ifndef PTRACE_GETREGSET
-# define PTRACE_GETREGSET 0x4204
-# define PTRACE_SETREGSET 0x4205
+#define PTRACE_GETREGSET 0x4204
+#define PTRACE_SETREGSET 0x4205
#endif
#ifndef PTRACE_GETSIGMASK
-# define PTRACE_GETSIGMASK 0x420a
-# define PTRACE_SETSIGMASK 0x420b
+#define PTRACE_GETSIGMASK 0x420a
+#define PTRACE_SETSIGMASK 0x420b
#endif
#ifndef PTRACE_SECCOMP_GET_FILTER
-#define PTRACE_SECCOMP_GET_FILTER 0x420c
+#define PTRACE_SECCOMP_GET_FILTER 0x420c
#endif
#ifndef PTRACE_SECCOMP_GET_METADATA
-# define PTRACE_SECCOMP_GET_METADATA 0x420d
+#define PTRACE_SECCOMP_GET_METADATA 0x420d
#endif /* PTRACE_SECCOMP_GET_METADATA */
/*
@@ -60,23 +61,48 @@
* own identical definition for a while.
*/
typedef struct {
- uint64_t filter_off; /* Input: which filter */
- uint64_t flags; /* Output: filter's flags */
+ uint64_t filter_off; /* Input: which filter */
+ uint64_t flags; /* Output: filter's flags */
} seccomp_metadata_t;
+#ifndef PTRACE_GET_RSEQ_CONFIGURATION
+#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
+
+struct __ptrace_rseq_configuration {
+ uint64_t rseq_abi_pointer;
+ uint32_t rseq_abi_size;
+ uint32_t signature;
+ uint32_t flags;
+ uint32_t pad;
+};
+#endif
+
#ifdef PTRACE_EVENT_STOP
-# if PTRACE_EVENT_STOP == 7 /* Bad value from Linux 3.1-3.3, fixed in 3.4 */
-# undef PTRACE_EVENT_STOP
-# endif
+#if PTRACE_EVENT_STOP == 7 /* Bad value from Linux 3.1-3.3, fixed in 3.4 */
+#undef PTRACE_EVENT_STOP
+#endif
#endif
#ifndef PTRACE_EVENT_STOP
-# define PTRACE_EVENT_STOP 128
+#define PTRACE_EVENT_STOP 128
+#endif
+
+/*
+ * Amazon Linux 2 uses glibc 2.26. PTRACE_ARCH_PRCTL was added in glibc 2.27.
+ * This allows CRIU to build on Amazon Linux 2.
+ *
+ * Note that in sys/ptrace.h, PTRACE_ARCH_PRCTL is an enum value so the
+ * preprocessor doesn't know about it. PT_ARCH_PRCTL is the preprocessor symbol
+ * that matches the value of PTRACE_ARCH_PRCTL. So look for PT_ARCH_PRCTL to
+ * decide if PTRACE_ARCH_PRCTL is available or not.
+ */
+#if defined(__x86_64__) && !defined(PT_ARCH_PRCTL)
+#define PTRACE_ARCH_PRCTL 30 /* From asm/ptrace-abi.h. */
#endif
extern int ptrace_suspend_seccomp(pid_t pid);
-extern int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes);
-extern int ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes);
-extern int ptrace_swap_area(pid_t pid, void *dst, void *src, long bytes);
+extern int __must_check ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes);
+extern int __must_check ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes);
+extern int __must_check ptrace_swap_area(pid_t pid, void *dst, void *src, long bytes);
#endif /* UAPI_COMPEL_PTRACE_H__ */
diff --git a/compel/include/uapi/sigframe-common.h b/compel/include/uapi/sigframe-common.h
index fc93c5480..3062d1d38 100644
--- a/compel/include/uapi/sigframe-common.h
+++ b/compel/include/uapi/sigframe-common.h
@@ -5,58 +5,57 @@
#define UAPI_COMPEL_SIGFRAME_COMMON_H__
#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__
-# error "Direct inclusion is forbidden, use