From 62aadb22ab1efeccef7fb322f525bd1b2cb6969c Mon Sep 17 00:00:00 2001
From: Yanning Yang <yangyanning@sjtu.edu.cn>
Date: Fri, 14 Nov 2025 23:08:16 +0000
Subject: [PATCH] amdgpu: use 64-bit offsets for parallel restore

On AMD Instinct MI300 systems, restoring a large GPU application can
fail because the checkpoint size is too large and the maximum value of
an offset (with integer type) is insufficient. This problem occurs when
the total size of all buffer objects exceeds int max, not because any
single buffer is too large, but it can also happen with a large number
of small buffers.

Fixes: #2812

Signed-off-by: Yanning Yang <yangyanning@sjtu.edu.cn>
Signed-off-by: Radostin Stoyanov <rstoyanov@fedoraproject.org>
---
 plugins/amdgpu/amdgpu_plugin.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c
index 713ffed6e..574d7b829 100644
--- a/plugins/amdgpu/amdgpu_plugin.c
+++ b/plugins/amdgpu/amdgpu_plugin.c
@@ -1651,7 +1651,7 @@ static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKf
 {
 	struct thread_data *thread_datas = NULL;
 	int thread_i, ret = 0;
-	int offset = 0;
+	uint64_t offset = 0;
 
 	for (int i = 0; i < e->num_of_bos; i++) {
 		struct kfd_criu_bo_bucket *bo_bucket = &bo_buckets[i];
@@ -2283,7 +2283,7 @@ void *parallel_restore_bo_contents(void *_thread_data)
 			continue;
 
 		entry = &restore_cmd->entries[i];
-		fseek(bo_contents_fp, entry->read_offset + offset, SEEK_SET);
+		fseeko64(bo_contents_fp, entry->read_offset + offset, SEEK_SET);
 		ret = sdma_copy_bo(restore_cmd->fds_write[entry->write_id], entry->size, bo_contents_fp,
 				   buffer, buffer_size, h_dev,
 				   max_copy_size, SDMA_OP_VRAM_WRITE, false);
@@ -2410,4 +2410,4 @@ int amdgpu_plugin_post_forking(void)
 
 	return back_thread_create(&parallel_thread, restore_device_parallel_worker, &parallel_thread_result);
 }
-CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__POST_FORKING, amdgpu_plugin_post_forking)
\ No newline at end of file
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__POST_FORKING, amdgpu_plugin_post_forking)