Revert "plugins/amdgpu: Implement parallel restore"

This functionality (#2527) is being reverted and excluded from this
release due to issue #2812.

It will be included in a subsequent release once all associated issues
are resolved.

Signed-off-by: Andrei Vagin <avagin@google.com>
This commit is contained in:
Andrei Vagin 2025-11-08 15:57:22 +00:00
parent 1d08ff8ca7
commit ce680fc6c7
8 changed files with 52 additions and 771 deletions

View file

@ -27,7 +27,7 @@ endif
criu-amdgpu.pb-c.c: criu-amdgpu.proto
protoc --proto_path=. --c_out=. criu-amdgpu.proto
amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c amdgpu_socket_utils.c
amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c
$(CC) $(PLUGIN_CFLAGS) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC)
amdgpu_plugin_clean:

View file

@ -3,8 +3,7 @@ Supporting ROCm with CRIU
_Felix Kuehling <Felix.Kuehling@amd.com>_<br>
_Rajneesh Bardwaj <Rajneesh.Bhardwaj@amd.com>_<br>
_David Yat Sin <David.YatSin@amd.com>_<br>
_Yanning Yang <yangyanning@sjtu.edu.cn>_
_David Yat Sin <David.YatSin@amd.com>_
# Introduction
@ -225,26 +224,6 @@ to resume execution on the GPUs.
*This new plugin is enabled by the new hook `__RESUME_DEVICES_LATE` in our RFC
patch series.*
## Restoring BO content in parallel
Restoring the BO content is an important part in the restore of GPU state and
usually takes a significant amount of time. A possible location for this
procedure is the `cr_plugin_restore_file` hook. However, restoring in this hook
blocks the target process from performing other restore operations, which
hinders further optimization of the restore process.
Therefore, a new plugin hook that runs in the master restore process is
introduced, and it interacts with the `cr_plugin_restore_file` hook to complete
the restore of BO content. Specifically, the target process only needs to send
the relevant BOs to the master restore process, while this new hook handles all
the restore of buffer objects. Through this method, during the restore of the BO
content, the target process can perform other restore operations, thus
accelerating the restore procedure. This is an implementation of the gCROP
method proposed in the ACM SoCC'24 paper: [On-demand and Parallel
Checkpoint/Restore for GPU Applications](https://dl.acm.org/doi/10.1145/3698038.3698510).
*This optimization technique is enabled by the `__POST_FORKING` hook.*
## Other CRIU changes
In addition to the new plugins, we need to make some changes to CRIU itself to

View file

@ -28,13 +28,11 @@
#include "xmalloc.h"
#include "criu-log.h"
#include "files.h"
#include "pstree.h"
#include "common/list.h"
#include "amdgpu_plugin_drm.h"
#include "amdgpu_plugin_util.h"
#include "amdgpu_plugin_topology.h"
#include "amdgpu_socket_utils.h"
#include "img-streamer.h"
#include "image.h"
@ -66,18 +64,6 @@ bool plugin_added_to_inventory = false;
bool plugin_disabled = false;
/*
* In the case of a single process (common case), this optimization can effectively
* reduce the restore latency with parallel restore. In the case of multiple processes,
* states are already restored in parallel within different processes. Therefore, this
* optimization does not introduce further improvement and will be disabled by default
* in this case. The flag, parallel_disabled, is used to control whether the
* optimization is enabled or disabled.
*/
bool parallel_disabled = false;
pthread_t parallel_thread = 0;
int parallel_thread_result = 0;
/**************************************************************************************************/
/* Call ioctl, restarting if it is interrupted */
@ -365,15 +351,6 @@ int amdgpu_plugin_init(int stage)
maps_init(&restore_maps);
if (stage == CR_PLUGIN_STAGE__RESTORE) {
if (has_children(root_item)) {
pr_info("Parallel restore disabled\n");
parallel_disabled = true;
} else {
if (install_parallel_sock() < 0) {
pr_err("Failed to install parallel socket\n");
return -1;
}
}
/* Default Values */
kfd_fw_version_check = true;
kfd_sdma_fw_version_check = true;
@ -1462,9 +1439,14 @@ static int restore_bos(struct kfd_ioctl_criu_args *args, CriuKfd *e)
static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKfd *e)
{
struct thread_data *thread_datas = NULL;
struct thread_data *thread_datas;
int thread_i, ret = 0;
int offset = 0;
thread_datas = xzalloc(sizeof(*thread_datas) * e->num_of_gpus);
if (!thread_datas) {
ret = -ENOMEM;
goto exit;
}
for (int i = 0; i < e->num_of_bos; i++) {
struct kfd_criu_bo_bucket *bo_bucket = &bo_buckets[i];
@ -1507,101 +1489,56 @@ static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKf
}
}
if (!parallel_disabled) {
parallel_restore_cmd restore_cmd;
pr_info("Begin to send parallel restore cmd\n");
ret = init_parallel_restore_cmd(e->num_of_bos, id, e->num_of_gpus, &restore_cmd);
if (ret)
goto exit_parallel;
thread_i = 0;
for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) {
struct tp_node *dev;
int ret_thread = 0;
uint32_t target_gpu_id;
for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) {
uint32_t target_gpu_id;
struct tp_node *dev;
if (!e->device_entries[i]->gpu_id)
continue;
if (!e->device_entries[i]->gpu_id)
continue;
/* e->device_entries[i]->gpu_id is user_gpu_id, target_gpu_id is actual_gpu_id */
target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id);
target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id);
dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id);
if (!dev) {
pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id);
ret = -ENODEV;
goto exit_parallel;
}
parallel_restore_gpu_id_add(e->device_entries[i]->gpu_id, dev->drm_render_minor, &restore_cmd);
for (int j = 0; j < e->num_of_bos; j++) {
if (bo_buckets[j].gpu_id != e->device_entries[i]->gpu_id)
continue;
if (bo_buckets[j].alloc_flags &
(KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
parallel_restore_bo_add(bo_buckets[j].dmabuf_fd, bo_buckets[j].gpu_id,
bo_buckets[j].size, offset, &restore_cmd);
offset += bo_buckets[j].size;
}
}
}
ret = send_parallel_restore_cmd(&restore_cmd);
exit_parallel:
free_parallel_restore_cmd(&restore_cmd);
} else {
thread_datas = xzalloc(sizeof(*thread_datas) * e->num_of_gpus);
if (!thread_datas) {
ret = -ENOMEM;
/* We need the fd for actual_gpu_id */
dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id);
if (!dev) {
pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id);
ret = -ENODEV;
goto exit;
}
thread_i = 0;
for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) {
struct tp_node *dev;
int ret_thread = 0;
uint32_t target_gpu_id;
thread_datas[thread_i].id = id;
thread_datas[thread_i].gpu_id = e->device_entries[i]->gpu_id;
thread_datas[thread_i].bo_buckets = bo_buckets;
thread_datas[thread_i].bo_entries = e->bo_entries;
thread_datas[thread_i].pid = e->pid;
thread_datas[thread_i].num_of_bos = e->num_of_bos;
if (!e->device_entries[i]->gpu_id)
continue;
/* e->device_entries[i]->gpu_id is user_gpu_id, target_gpu_id is actual_gpu_id */
target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id);
/* We need the fd for actual_gpu_id */
dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id);
if (!dev) {
pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id);
ret = -ENODEV;
goto exit;
}
thread_datas[thread_i].id = id;
thread_datas[thread_i].gpu_id = e->device_entries[i]->gpu_id;
thread_datas[thread_i].bo_buckets = bo_buckets;
thread_datas[thread_i].bo_entries = e->bo_entries;
thread_datas[thread_i].pid = e->pid;
thread_datas[thread_i].num_of_bos = e->num_of_bos;
thread_datas[thread_i].drm_fd = node_get_drm_render_device(dev);
if (thread_datas[thread_i].drm_fd < 0) {
ret = -thread_datas[thread_i].drm_fd;
goto exit;
}
ret_thread = pthread_create(&thread_datas[thread_i].thread, NULL, restore_bo_contents,
(void *)&thread_datas[thread_i]);
if (ret_thread) {
pr_err("Failed to create thread[%i] ret:%d\n", thread_i, ret_thread);
ret = -ret_thread;
goto exit;
}
thread_i++;
thread_datas[thread_i].drm_fd = node_get_drm_render_device(dev);
if (thread_datas[thread_i].drm_fd < 0) {
ret = -thread_datas[thread_i].drm_fd;
goto exit;
}
for (int i = 0; i < e->num_of_gpus; i++) {
pthread_join(thread_datas[i].thread, NULL);
pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret);
ret_thread = pthread_create(&thread_datas[thread_i].thread, NULL, restore_bo_contents,
(void *)&thread_datas[thread_i]);
if (ret_thread) {
pr_err("Failed to create thread[%i] ret:%d\n", thread_i, ret_thread);
ret = -ret_thread;
goto exit;
}
thread_i++;
}
if (thread_datas[i].ret) {
ret = thread_datas[i].ret;
goto exit;
}
for (int i = 0; i < e->num_of_gpus; i++) {
pthread_join(thread_datas[i].thread, NULL);
pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret);
if (thread_datas[i].ret) {
ret = thread_datas[i].ret;
goto exit;
}
}
exit:
@ -1609,8 +1546,8 @@ exit:
if (bo_buckets[i].dmabuf_fd != KFD_INVALID_FD)
close(bo_buckets[i].dmabuf_fd);
}
if (thread_datas)
xfree(thread_datas);
xfree(thread_datas);
return ret;
}
@ -1899,24 +1836,6 @@ int amdgpu_plugin_resume_devices_late(int target_pid)
if (plugin_disabled)
return -ENOTSUP;
if (!parallel_disabled) {
pr_info("Close parallel restore server\n");
if (close_parallel_restore_server()) {
pr_err("Close parallel restore server fail\n");
return -1;
}
exit_code = pthread_join(parallel_thread, NULL);
if (exit_code) {
pr_err("Failed to join parallel thread ret:%d\n", exit_code);
return -1;
}
if (parallel_thread_result) {
pr_err("Parallel restore fail\n");
return parallel_thread_result;
}
}
pr_info("Inside %s for target pid = %d\n", __func__, target_pid);
fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC);
@ -1943,244 +1862,3 @@ int amdgpu_plugin_resume_devices_late(int target_pid)
}
CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, amdgpu_plugin_resume_devices_late)
int sdma_copy_bo_helper(uint64_t size, int fd, FILE *storage_fp, void *buffer, size_t buffer_size,
amdgpu_device_handle h_dev, uint64_t max_copy_size, enum sdma_op_type type)
{
return sdma_copy_bo((struct kfd_criu_bo_bucket){ 0, size, 0, 0, 0, 0, fd, 0 }, storage_fp, buffer,
buffer_size, h_dev, max_copy_size, SDMA_OP_VRAM_WRITE);
}
int init_dev(int dev_minor, amdgpu_device_handle *h_dev, uint64_t *max_copy_size)
{
int ret = 0;
int drm_fd = -1;
uint32_t major, minor;
struct amdgpu_gpu_info gpu_info = { 0 };
drm_fd = open_drm_render_device(dev_minor);
if (drm_fd < 0) {
return drm_fd;
}
ret = amdgpu_device_initialize(drm_fd, &major, &minor, h_dev);
if (ret) {
pr_perror("Failed to initialize device");
goto err;
}
ret = amdgpu_query_gpu_info(*h_dev, &gpu_info);
if (ret) {
pr_perror("failed to query gpuinfo via libdrm");
goto err;
}
*max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE :
SDMA_LINEAR_COPY_MAX_SIZE - 1;
return 0;
err:
amdgpu_device_deinitialize(*h_dev);
return ret;
}
FILE *get_bo_contents_fp(int id, int gpu_id, size_t tot_size)
{
char img_path[PATH_MAX];
size_t image_size = 0;
FILE *bo_contents_fp = NULL;
snprintf(img_path, sizeof(img_path), IMG_KFD_PAGES_FILE, id, gpu_id);
bo_contents_fp = open_img_file(img_path, false, &image_size);
if (!bo_contents_fp) {
pr_perror("Cannot fopen %s", img_path);
return NULL;
}
if (tot_size != image_size) {
pr_err("%s size mismatch (current:%ld:expected:%ld)\n", img_path, image_size, tot_size);
fclose(bo_contents_fp);
return NULL;
}
return bo_contents_fp;
}
struct parallel_thread_data {
pthread_t thread;
uint32_t gpu_id;
int minor;
parallel_restore_cmd *restore_cmd;
int ret;
};
void *parallel_restore_bo_contents(void *_thread_data)
{
struct parallel_thread_data *thread_data = (struct parallel_thread_data *)_thread_data;
amdgpu_device_handle h_dev;
uint64_t max_copy_size;
size_t total_bo_size = 0, max_bo_size = 0, buffer_size = 0;
FILE *bo_contents_fp = NULL;
parallel_restore_entry *entry;
parallel_restore_cmd *restore_cmd = thread_data->restore_cmd;
int ret = 0;
int offset = 0;
void *buffer = NULL;
ret = init_dev(thread_data->minor, &h_dev, &max_copy_size);
if (ret) {
goto err;
}
for (int i = 0; i < restore_cmd->cmd_head.entry_num; i++) {
if (restore_cmd->entries[i].gpu_id == thread_data->gpu_id) {
total_bo_size += restore_cmd->entries[i].size;
max_bo_size = max(restore_cmd->entries[i].size, max_bo_size);
}
}
buffer_size = kfd_max_buffer_size > 0 ? min(kfd_max_buffer_size, max_bo_size) : max_bo_size;
bo_contents_fp = get_bo_contents_fp(restore_cmd->cmd_head.id, thread_data->gpu_id, total_bo_size);
if (bo_contents_fp == NULL) {
ret = -1;
goto err_sdma;
}
offset = ftell(bo_contents_fp);
posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size);
if (!buffer) {
pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE.");
ret = -ENOMEM;
goto err_sdma;
}
for (int i = 0; i < restore_cmd->cmd_head.entry_num; i++) {
if (restore_cmd->entries[i].gpu_id != thread_data->gpu_id)
continue;
entry = &restore_cmd->entries[i];
fseek(bo_contents_fp, entry->read_offset + offset, SEEK_SET);
ret = sdma_copy_bo_helper(entry->size, restore_cmd->fds_write[entry->write_id], bo_contents_fp, buffer,
buffer_size, h_dev, max_copy_size, SDMA_OP_VRAM_WRITE);
if (ret) {
pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i);
goto err_sdma;
}
}
err_sdma:
if (bo_contents_fp)
fclose(bo_contents_fp);
if (buffer)
xfree(buffer);
amdgpu_device_deinitialize(h_dev);
err:
thread_data->ret = ret;
return NULL;
}
void *restore_device_parallel_worker(void *arg)
{
while (1) {
parallel_restore_cmd restore_cmd = { 0 };
struct parallel_thread_data *thread_datas = NULL;
int ret;
int error_occurred = 0, join_ret = 0, created_threads = 0;
ret = recv_parallel_restore_cmd(&restore_cmd);
if (ret) {
if (ret == 1) {
*(int *)arg = 0;
goto exit;
}
goto err;
}
thread_datas = xzalloc(sizeof(*thread_datas) * restore_cmd.cmd_head.gpu_num);
if (!thread_datas) {
ret = -ENOMEM;
goto err;
}
for (; created_threads < restore_cmd.cmd_head.gpu_num; created_threads++) {
thread_datas[created_threads].gpu_id = restore_cmd.gpu_ids[created_threads].gpu_id;
thread_datas[created_threads].minor = restore_cmd.gpu_ids[created_threads].minor;
thread_datas[created_threads].restore_cmd = &restore_cmd;
ret = pthread_create(&thread_datas[created_threads].thread, NULL, parallel_restore_bo_contents,
(void *)&thread_datas[created_threads]);
if (ret) {
pr_err("Failed to create thread[0x%x] ret:%d\n", thread_datas[created_threads].gpu_id, ret);
error_occurred = 1;
break;
}
}
for (int i = 0; i < created_threads; i++) {
join_ret = pthread_join(thread_datas[i].thread, NULL);
if (join_ret != 0) {
pr_err("pthread_join failed for Thread[0x%x] ret:%d\n",
thread_datas[i].gpu_id, join_ret);
if (!error_occurred) {
ret = join_ret;
error_occurred = 1;
}
}
pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret);
/* Check thread return value */
if (thread_datas[i].ret && !error_occurred) {
ret = thread_datas[i].ret;
error_occurred = 1;
}
}
if (thread_datas)
xfree(thread_datas);
err:
free_parallel_restore_cmd(&restore_cmd);
if (ret) {
*(int *)arg = ret;
return NULL;
}
}
exit:
return NULL;
}
/*
* While the background thread is running, some processing functions (e.g., stop_cgroupd)
* in the main thread need to block SIGCHLD. To prevent interference from this background
* thread, SIGCHLD is blocked in this thread.
*/
static int back_thread_create(pthread_t *newthread, void *(*f)(void *), void *arg)
{
int ret = 0;
sigset_t blockmask, oldmask;
sigemptyset(&blockmask);
sigaddset(&blockmask, SIGCHLD);
sigprocmask(SIG_BLOCK, &blockmask, &oldmask);
ret = pthread_create(newthread, NULL, f, arg);
if (ret) {
pr_err("Create worker thread fail: %d\n", ret);
return -1;
}
sigprocmask(SIG_SETMASK, &oldmask, NULL);
return 0;
}
int amdgpu_plugin_post_forking(void)
{
if (plugin_disabled)
return -ENOTSUP;
if (parallel_disabled)
return 0;
return back_thread_create(&parallel_thread, restore_device_parallel_worker, &parallel_thread_result);
}
CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__POST_FORKING, amdgpu_plugin_post_forking)

View file

@ -45,7 +45,7 @@ bool kfd_capability_check = true;
*/
int fd_next = -1;
int open_drm_render_device(int minor)
static int open_drm_render_device(int minor)
{
char path[128];
int fd, ret_fd;

View file

@ -118,7 +118,6 @@ struct tp_node *sys_get_node_by_gpu_id(const struct tp_system *sys, const uint32
struct tp_node *sys_get_node_by_render_minor(const struct tp_system *sys, const int drm_render_minor);
struct tp_node *sys_get_node_by_index(const struct tp_system *sys, uint32_t index);
int open_drm_render_device(int minor);
int node_get_drm_render_device(struct tp_node *node);
void sys_close_drm_render_devices(struct tp_system *sys);

View file

@ -1,320 +0,0 @@
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>
#include "amdgpu_socket_utils.h"
#include "criu-log.h"
#include "common/scm.h"
#include "fdstore.h"
#include "util-pie.h"
#include "util.h"
int parallel_socket_addr_len;
struct sockaddr_un parallel_socket_addr;
int parallel_socket_id = 0;
static void amdgpu_socket_name_gen(struct sockaddr_un *addr, int *len)
{
addr->sun_family = AF_UNIX;
snprintf(addr->sun_path, UNIX_PATH_MAX, "x/criu-amdgpu-parallel-%s", criu_run_id);
*len = SUN_LEN(addr);
*addr->sun_path = '\0';
}
int install_parallel_sock(void)
{
int ret = 0;
int sock_fd;
sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
if (sock_fd < 0) {
pr_perror("socket creation failed");
return -1;
}
amdgpu_socket_name_gen(&parallel_socket_addr, &parallel_socket_addr_len);
ret = bind(sock_fd, (struct sockaddr *)&parallel_socket_addr, parallel_socket_addr_len);
if (ret < 0) {
pr_perror("bind failed");
goto err;
}
ret = listen(sock_fd, SOMAXCONN);
if (ret < 0) {
pr_perror("listen failed");
goto err;
}
parallel_socket_id = fdstore_add(sock_fd);
if (parallel_socket_id < 0) {
ret = -1;
goto err;
}
err:
close(sock_fd);
return ret;
}
void parallel_restore_bo_add(int dmabuf_fd, int gpu_id, uint64_t size, uint64_t offset,
parallel_restore_cmd *restore_cmd)
{
parallel_restore_entry *restore_entry = &restore_cmd->entries[restore_cmd->cmd_head.entry_num];
restore_entry->gpu_id = gpu_id;
restore_entry->write_id = restore_cmd->cmd_head.fd_write_num;
restore_entry->write_offset = 0;
restore_entry->read_offset = offset;
restore_entry->size = size;
restore_cmd->fds_write[restore_cmd->cmd_head.fd_write_num] = dmabuf_fd;
restore_cmd->cmd_head.entry_num += 1;
restore_cmd->cmd_head.fd_write_num += 1;
}
void parallel_restore_gpu_id_add(int gpu_id, int minor, parallel_restore_cmd *restore_cmd)
{
restore_cmd->gpu_ids[restore_cmd->cmd_head.gpu_num] = (parallel_gpu_info){ gpu_id, minor };
restore_cmd->cmd_head.gpu_num += 1;
}
static int send_metadata(int sock_fd, parallel_restore_cmd *restore_cmd)
{
if (send(sock_fd, &restore_cmd->cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) {
pr_perror("Send parallel restore command head fail");
return -1;
}
return 0;
}
static int send_gpu_ids(int sock_fd, parallel_restore_cmd *restore_cmd)
{
if (send(sock_fd, restore_cmd->gpu_ids, restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info), 0) < 0) {
pr_perror("Send GPU ids of parallel restore command fail");
return -1;
}
return 0;
}
static int send_cmds(int sock_fd, parallel_restore_cmd *restore_cmd)
{
if (send(sock_fd, restore_cmd->entries, restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry), 0) < 0) {
pr_perror("Send parallel restore command fail");
return -1;
}
return 0;
}
static int send_dmabuf_fds(int sock_fd, parallel_restore_cmd *restore_cmd)
{
if (send_fds(sock_fd, NULL, 0, restore_cmd->fds_write, restore_cmd->cmd_head.fd_write_num, 0, 0) < 0) {
pr_perror("Send dmabuf fds fail");
return -1;
}
return 0;
}
int send_parallel_restore_cmd(parallel_restore_cmd *restore_cmd)
{
int sock_fd;
int ret = 0;
sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
if (sock_fd < 0) {
pr_perror("Socket creation failed");
return -1;
}
ret = connect(sock_fd, (struct sockaddr *)&parallel_socket_addr, parallel_socket_addr_len);
if (ret < 0) {
pr_perror("Connect failed");
goto err;
}
ret = send_metadata(sock_fd, restore_cmd);
if (ret) {
goto err;
}
ret = send_gpu_ids(sock_fd, restore_cmd);
if (ret) {
goto err;
}
ret = send_cmds(sock_fd, restore_cmd);
if (ret) {
goto err;
}
ret = send_dmabuf_fds(sock_fd, restore_cmd);
err:
close(sock_fd);
return ret;
}
int init_parallel_restore_cmd(int num, int id, int gpu_num, parallel_restore_cmd *restore_cmd)
{
restore_cmd->cmd_head.id = id;
restore_cmd->cmd_head.fd_write_num = 0;
restore_cmd->cmd_head.entry_num = 0;
restore_cmd->cmd_head.gpu_num = 0;
restore_cmd->gpu_ids = xzalloc(gpu_num * sizeof(parallel_gpu_info));
if (!restore_cmd->gpu_ids)
return -ENOMEM;
restore_cmd->fds_write = xzalloc(num * sizeof(int));
if (!restore_cmd->fds_write)
return -ENOMEM;
restore_cmd->entries = xzalloc(num * sizeof(parallel_restore_entry));
if (!restore_cmd->entries)
return -ENOMEM;
return 0;
}
void free_parallel_restore_cmd(parallel_restore_cmd *restore_cmd)
{
if (restore_cmd->gpu_ids)
xfree(restore_cmd->gpu_ids);
if (restore_cmd->fds_write)
xfree(restore_cmd->fds_write);
if (restore_cmd->entries)
xfree(restore_cmd->entries);
}
static int init_parallel_restore_cmd_by_head(parallel_restore_cmd *restore_cmd)
{
restore_cmd->gpu_ids = xzalloc(restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info));
if (!restore_cmd->gpu_ids)
return -ENOMEM;
restore_cmd->fds_write = xzalloc(restore_cmd->cmd_head.fd_write_num * sizeof(int));
if (!restore_cmd->fds_write)
return -ENOMEM;
restore_cmd->entries = xzalloc(restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry));
if (!restore_cmd->entries)
return -ENOMEM;
return 0;
}
static int check_quit_cmd(parallel_restore_cmd *restore_cmd)
{
return restore_cmd->cmd_head.fd_write_num == 0;
}
static int recv_metadata(int client_fd, parallel_restore_cmd *restore_cmd)
{
if (recv(client_fd, &restore_cmd->cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) {
pr_perror("Recv parallel restore command head fail");
return -1;
}
return 0;
}
static int recv_cmds(int client_fd, parallel_restore_cmd *restore_cmd)
{
if (recv(client_fd, restore_cmd->entries, restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry), 0) < 0) {
pr_perror("Recv parallel restore command fail");
return -1;
}
return 0;
}
static int recv_gpu_ids(int sock_fd, parallel_restore_cmd *restore_cmd)
{
if (recv(sock_fd, restore_cmd->gpu_ids, restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info), 0) < 0) {
pr_perror("Send GPU ids of parallel restore command fail");
return -1;
}
return 0;
}
static int recv_dmabuf_fds(int client_fd, parallel_restore_cmd *restore_cmd)
{
if (recv_fds(client_fd, restore_cmd->fds_write, restore_cmd->cmd_head.fd_write_num, 0, 0) < 0) {
pr_perror("Recv dmabuf fds fail");
return -1;
}
return 0;
}
int recv_parallel_restore_cmd(parallel_restore_cmd *restore_cmd)
{
int sock_fd, client_fd;
int ret = 0;
sock_fd = fdstore_get(parallel_socket_id);
if (sock_fd < 0)
return -1;
client_fd = accept(sock_fd, NULL, NULL);
if (client_fd < 0) {
ret = client_fd;
goto err_accept;
}
ret = recv_metadata(client_fd, restore_cmd);
if (ret) {
goto err;
}
// Return 1 to quit
if (check_quit_cmd(restore_cmd)) {
ret = 1;
goto err;
}
ret = init_parallel_restore_cmd_by_head(restore_cmd);
if (ret) {
goto err;
}
ret = recv_gpu_ids(client_fd, restore_cmd);
if (ret) {
goto err;
}
ret = recv_cmds(client_fd, restore_cmd);
if (ret) {
goto err;
}
ret = recv_dmabuf_fds(client_fd, restore_cmd);
err:
close(client_fd);
err_accept:
close(sock_fd);
return ret;
}
int close_parallel_restore_server(void)
{
int sock_fd;
int ret = 0;
parallel_restore_cmd_head cmd_head;
sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
if (sock_fd < 0) {
pr_perror("Socket creation failed");
return -1;
}
ret = connect(sock_fd, (struct sockaddr *)&parallel_socket_addr, parallel_socket_addr_len);
if (ret < 0) {
pr_perror("Connect failed");
goto err;
}
memset(&cmd_head, 0, sizeof(parallel_restore_cmd_head));
if (send(sock_fd, &cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) {
pr_perror("Send parallel restore command head fail");
return -1;
}
err:
close(sock_fd);
return ret;
}

View file

@ -1,54 +0,0 @@
#ifndef __KFD_PLUGIN_AMDGPU_SOCKET_UTILS_H__
#define __KFD_PLUGIN_AMDGPU_SOCKET_UTILS_H__
typedef struct {
int id;
int fd_write_num; /* The number of buffer objects to be restored. */
int entry_num; /* The number of restore commands.*/
int gpu_num;
} parallel_restore_cmd_head;
typedef struct {
int gpu_id;
int minor;
} parallel_gpu_info;
typedef struct {
int gpu_id;
int write_id;
uint64_t read_offset;
uint64_t write_offset;
uint64_t size;
} parallel_restore_entry;
typedef struct {
parallel_restore_cmd_head cmd_head;
int *fds_write;
parallel_gpu_info *gpu_ids;
parallel_restore_entry *entries;
} parallel_restore_cmd;
/*
* For parallel_restore, a background thread in the main CRIU process is used to restore the GPU
* buffer object. However, initially, the ownership of these buffer objects and the metadata for
* restoration are all with the target process. Therefore, we introduce a series of functions to
* help the target process send these tasks to the main CRIU process.
*/
int init_parallel_restore_cmd(int num, int id, int gpu_num, parallel_restore_cmd *restore_cmd);
void free_parallel_restore_cmd(parallel_restore_cmd *restore_cmd);
int install_parallel_sock(void);
int send_parallel_restore_cmd(parallel_restore_cmd *restore_cmd);
int recv_parallel_restore_cmd(parallel_restore_cmd *restore_cmd);
void parallel_restore_bo_add(int dmabuf_fd, int gpu_id, uint64_t size, uint64_t offset,
parallel_restore_cmd *restore_cmd);
void parallel_restore_gpu_id_add(int gpu_id, int minor, parallel_restore_cmd *restore_cmd);
int close_parallel_restore_server(void);
#endif