diff --git a/.codespellrc b/.codespellrc index e91a6d2eb..5def594b2 100644 --- a/.codespellrc +++ b/.codespellrc @@ -1,3 +1,3 @@ [codespell] -skip = ./.git,./test/pki,./tags +skip = ./.git,./test/pki,./tags,./plugins/amdgpu/amdgpu_drm.h,./plugins/amdgpu/drm.h,./plugins/amdgpu/drm_mode.h ignore-words-list = creat,fpr,fle,ue,bord,parms,nd,te,testng,inh,wronly,renderd,bui,clen,sems diff --git a/Documentation/criu-amdgpu-plugin.txt b/Documentation/criu-amdgpu-plugin.txt index 68803f3db..fe76fc3bc 100644 --- a/Documentation/criu-amdgpu-plugin.txt +++ b/Documentation/criu-amdgpu-plugin.txt @@ -15,6 +15,7 @@ Checkpoint / Restore inside a docker container Pytorch Tensorflow Using CRIU Image Streamer +Parallel Restore DESCRIPTION ----------- diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 40ede84e2..0c9a9e527 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -502,8 +502,8 @@ Restores previously checkpointed processes. The 'resource' argument can be one of the following: + - **tty[**__rdev__**:**__dev__**]** - - **pipe[**__inode__**]** - - **socket[**__inode__*]* + - **pipe:[**__inode__**]** + - **socket:[**__inode__*]* - **file[**__mnt_id__**:**__inode__**]** - 'path/to/file' diff --git a/Documentation/logo.svg b/Documentation/logo.svg new file mode 100644 index 000000000..f713e72b7 --- /dev/null +++ b/Documentation/logo.svg @@ -0,0 +1,136 @@ + + + + + + + diff --git a/README.md b/README.md index f578e745c..6e2a0de9e 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![CircleCI](https://circleci.com/gh/checkpoint-restore/criu.svg?style=svg)]( https://circleci.com/gh/checkpoint-restore/criu) -

+

## CRIU -- A project to implement checkpoint/restore functionality for Linux diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h new file mode 100644 index 000000000..9f9655e3b --- /dev/null +++ b/compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h @@ -0,0 +1,47 @@ +#ifndef __UAPI_ASM_GCS_TYPES_H__ +#define __UAPI_ASM_GCS_TYPES_H__ + +#ifndef NT_ARM_GCS +#define NT_ARM_GCS 0x410 /* ARM GCS state */ +#endif + +/* Shadow Stack/Guarded Control Stack interface */ +#define PR_GET_SHADOW_STACK_STATUS 74 +#define PR_SET_SHADOW_STACK_STATUS 75 +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +/* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack */ +#ifndef PR_SHADOW_STACK_ENABLE +#define PR_SHADOW_STACK_ENABLE (1UL << 0) +#endif + +/* Allows explicit GCS stores (eg. using GCSSTR) */ +#ifndef PR_SHADOW_STACK_WRITE +#define PR_SHADOW_STACK_WRITE (1UL << 1) +#endif + +/* Allows explicit GCS pushes (eg. using GCSPUSHM) */ +#ifndef PR_SHADOW_STACK_PUSH +#define PR_SHADOW_STACK_PUSH (1UL << 2) +#endif + +#ifndef SHADOW_STACK_SET_TOKEN +#define SHADOW_STACK_SET_TOKEN 0x1 /* Set up a restore token in the shadow stack */ +#endif + +#define PR_SHADOW_STACK_ALL_MODES \ + PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH + +/* copied from: arch/arm64/include/asm/sysreg.h */ +#define GCS_CAP_VALID_TOKEN 0x1 +#define GCS_CAP_ADDR_MASK 0xFFFFFFFFFFFFF000ULL +#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | GCS_CAP_VALID_TOKEN) +#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK) + +#include + +#ifndef HWCAP_GCS +#define HWCAP_GCS (1UL << 32) +#endif + +#endif /* __UAPI_ASM_GCS_TYPES_H__ */ \ No newline at end of file diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h index 9d4ce7e2e..606c92ffe 100644 --- a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h @@ -2,6 +2,7 @@ #define UAPI_COMPEL_ASM_TYPES_H__ #include +#include #include #include #include @@ -16,7 +17,24 @@ */ typedef struct user_pt_regs user_regs_struct_t; -typedef struct user_fpsimd_state user_fpregs_struct_t; + +/* + * GCS (Guarded Control Stack) + * + * This mirrors the kernel definition but renamed to cr_user_gcs + * to avoid conflict with kernel headers (/usr/include/asm/ptrace.h). + */ +struct cr_user_gcs { + __u64 features_enabled; + __u64 features_locked; + __u64 gcspr_el0; +}; + +struct user_fpregs_struct { + struct user_fpsimd_state fpstate; + struct cr_user_gcs gcs; +}; +typedef struct user_fpregs_struct user_fpregs_struct_t; #define __compel_arch_fetch_thread_area(tid, th) 0 #define compel_arch_fetch_thread_area(tctl) 0 @@ -39,4 +57,12 @@ typedef struct user_fpsimd_state user_fpregs_struct_t; __NR_##syscall; \ }) +extern bool __compel_host_supports_gcs(void); +#define compel_host_supports_gcs __compel_host_supports_gcs + +struct parasite_ctl; +extern int __parasite_setup_shstk(struct parasite_ctl *ctl, + user_fpregs_struct_t *ext_regs); +#define parasite_setup_shstk __parasite_setup_shstk + #endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h index a3528500d..7efee528f 100644 --- a/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h +++ b/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h @@ -10,11 +10,20 @@ /* Copied from the kernel header arch/arm64/include/uapi/asm/sigcontext.h */ #define FPSIMD_MAGIC 0x46508001 +#define GCS_MAGIC 0x47435300 typedef struct fpsimd_context fpu_state_t; +struct gcs_context { + struct _aarch64_ctx head; + __u64 gcspr; + __u64 features_enabled; + __u64 reserved; +}; + struct aux_context { struct fpsimd_context fpsimd; + struct gcs_context gcs; /* additional context to be added before "end" */ struct _aarch64_ctx end; }; @@ -63,6 +72,7 @@ struct cr_sigcontext { #define RT_SIGFRAME_AUX_CONTEXT(rt_sigframe) ((struct aux_context *)&(RT_SIGFRAME_SIGCONTEXT(rt_sigframe)->__reserved)) #define RT_SIGFRAME_FPU(rt_sigframe) (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->fpsimd) #define RT_SIGFRAME_OFFSET(rt_sigframe) 0 +#define RT_SIGFRAME_GCS(rt_sigframe) (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->gcs) #define rt_sigframe_erase_sigset(sigframe) memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t)) #define rt_sigframe_copy_sigset(sigframe, from) memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t)) diff --git a/compel/arch/aarch64/src/lib/infect.c b/compel/arch/aarch64/src/lib/infect.c index ec1d0d59e..42f593c79 100644 --- a/compel/arch/aarch64/src/lib/infect.c +++ b/compel/arch/aarch64/src/lib/infect.c @@ -2,8 +2,8 @@ #include #include #include +#include #include -#include #include #include "common/page.h" @@ -13,6 +13,8 @@ #include "infect.h" #include "infect-priv.h" #include "asm/breakpoints.h" +#include "asm/gcs-types.h" +#include unsigned __page_size = 0; unsigned __page_shift = 0; @@ -33,24 +35,54 @@ static inline void __always_unused __check_code_syscall(void) BUILD_BUG_ON(!is_log2(sizeof(code_syscall))); } +bool __compel_host_supports_gcs(void) +{ + unsigned long hwcap = getauxval(AT_HWCAP); + return (hwcap & HWCAP_GCS) != 0; +} + +static bool __compel_gcs_enabled(struct cr_user_gcs *gcs) +{ + if (!compel_host_supports_gcs()) + return false; + + return gcs && (gcs->features_enabled & PR_SHADOW_STACK_ENABLE) != 0; +} + int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) { struct fpsimd_context *fpsimd = RT_SIGFRAME_FPU(sigframe); + struct gcs_context *gcs = RT_SIGFRAME_GCS(sigframe); memcpy(sigframe->uc.uc_mcontext.regs, regs->regs, sizeof(regs->regs)); + pr_debug("sigreturn_prep_regs_plain: sp %lx pc %lx\n", (long)regs->sp, (long)regs->pc); + sigframe->uc.uc_mcontext.sp = regs->sp; sigframe->uc.uc_mcontext.pc = regs->pc; sigframe->uc.uc_mcontext.pstate = regs->pstate; - memcpy(fpsimd->vregs, fpregs->vregs, 32 * sizeof(__uint128_t)); + memcpy(fpsimd->vregs, fpregs->fpstate.vregs, 32 * sizeof(__uint128_t)); - fpsimd->fpsr = fpregs->fpsr; - fpsimd->fpcr = fpregs->fpcr; + fpsimd->fpsr = fpregs->fpstate.fpsr; + fpsimd->fpcr = fpregs->fpstate.fpcr; fpsimd->head.magic = FPSIMD_MAGIC; fpsimd->head.size = sizeof(*fpsimd); + if (__compel_gcs_enabled(&fpregs->gcs)) { + gcs->head.magic = GCS_MAGIC; + gcs->head.size = sizeof(*gcs); + gcs->reserved = 0; + gcs->gcspr = fpregs->gcs.gcspr_el0 - 8; + gcs->features_enabled = fpregs->gcs.features_enabled; + + pr_debug("sigframe gcspr=%llx features_enabled=%llx\n", fpregs->gcs.gcspr_el0 - 8, fpregs->gcs.features_enabled); + } else { + pr_debug("sigframe gcspr=[disabled]\n"); + memset(gcs, 0, sizeof(*gcs)); + } + return 0; } @@ -59,7 +91,7 @@ int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigfr return 0; } -int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *fpsimd, save_regs_t save, +int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save, void *arg, __maybe_unused unsigned long flags) { struct iovec iov; @@ -74,14 +106,28 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct goto err; } - iov.iov_base = fpsimd; - iov.iov_len = sizeof(*fpsimd); + iov.iov_base = &ext_regs->fpstate; + iov.iov_len = sizeof(ext_regs->fpstate); if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) { pr_perror("Failed to obtain FPU registers for %d", pid); goto err; } - ret = save(pid, arg, regs, fpsimd); + memset(&ext_regs->gcs, 0, sizeof(ext_regs->gcs)); + + iov.iov_base = &ext_regs->gcs; + iov.iov_len = sizeof(ext_regs->gcs); + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &iov) == 0) { + pr_info("gcs: GCSPR_EL0 for %d: 0x%llx, features: 0x%llx\n", + pid, ext_regs->gcs.gcspr_el0, ext_regs->gcs.features_enabled); + + if (!__compel_gcs_enabled(&ext_regs->gcs)) + pr_info("gcs: GCS is NOT enabled\n"); + } else { + pr_info("gcs: GCS state not available for %d\n", pid); + } + + ret = save(pid, arg, regs, ext_regs); err: return ret; } @@ -90,14 +136,44 @@ int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs) { struct iovec iov; + struct cr_user_gcs gcs; + struct iovec gcs_iov = { .iov_base = &gcs, .iov_len = sizeof(gcs) }; + pr_info("Restoring GP/FPU registers for %d\n", pid); - iov.iov_base = ext_regs; - iov.iov_len = sizeof(*ext_regs); + iov.iov_base = &ext_regs->fpstate; + iov.iov_len = sizeof(ext_regs->fpstate); if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov)) { pr_perror("Failed to set FPU registers for %d", pid); return -1; } + + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) < 0) { + pr_warn("gcs: Failed to get GCS for %d\n", pid); + } else { + ext_regs->gcs = gcs; + compel_set_task_gcs_regs(pid, ext_regs); + } + + return 0; +} + +int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs) +{ + struct iovec iov; + + pr_info("gcs: restoring GCS registers for %d\n", pid); + pr_info("gcs: restoring GCS: gcspr=%llx features=%llx\n", + ext_regs->gcs.gcspr_el0, ext_regs->gcs.features_enabled); + + iov.iov_base = &ext_regs->gcs; + iov.iov_len = sizeof(ext_regs->gcs); + + if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_GCS, &iov)) { + pr_perror("gcs: Failed to set GCS registers for %d", pid); + return -1; + } + return 0; } @@ -286,3 +362,68 @@ int ptrace_flush_breakpoints(pid_t pid) return 0; } + +int inject_gcs_cap_token(struct parasite_ctl *ctl, pid_t pid, struct cr_user_gcs *gcs) +{ + struct iovec gcs_iov = { .iov_base = gcs, .iov_len = sizeof(*gcs) }; + + uint64_t token_addr = gcs->gcspr_el0 - 8; + uint64_t sigtramp_addr = gcs->gcspr_el0 - 16; + + uint64_t cap_token = ALIGN_DOWN(GCS_SIGNAL_CAP(token_addr), 8); + unsigned long restorer_addr; + + pr_info("gcs: (setup) CAP token: 0x%lx at addr: 0x%lx\n", cap_token, token_addr); + + /* Inject capability token at gcspr_el0 - 8 */ + if (ptrace(PTRACE_POKEDATA, pid, (void *)token_addr, cap_token)) { + pr_perror("gcs: (setup) Inject GCS cap token failed"); + return -1; + } + + /* Inject restorer trampoline address (gcspr_el0 - 16) */ + restorer_addr = ctl->parasite_ip; + if (ptrace(PTRACE_POKEDATA, pid, (void *)sigtramp_addr, restorer_addr)) { + pr_perror("gcs: (setup) Inject GCS restorer failed"); + return -1; + } + + /* Update GCSPR_EL0 */ + gcs->gcspr_el0 = token_addr; + if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_GCS, &gcs_iov)) { + pr_perror("gcs: PTRACE_SETREGS FAILED"); + return -1; + } + + pr_debug("gcs: parasite_ip=%#lx sp=%#llx gcspr_el0=%#llx\n", + ctl->parasite_ip, ctl->orig.regs.sp, gcs->gcspr_el0); + + return 0; +} + +int parasite_setup_shstk(struct parasite_ctl *ctl, user_fpregs_struct_t *ext_regs) +{ + struct cr_user_gcs gcs; + struct iovec gcs_iov = { .iov_base = &gcs, .iov_len = sizeof(gcs) }; + pid_t pid = ctl->rpid; + + if(!__compel_host_supports_gcs()) + return 0; + + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) != 0) { + pr_perror("GCS state not available for %d", pid); + return -1; + } + + if (!__compel_gcs_enabled(&gcs)) + return 0; + + if (inject_gcs_cap_token(ctl, pid, &gcs)) { + pr_perror("Failed to inject GCS cap token for %d", pid); + return -1; + } + + pr_info("gcs: GCS enabled for %d\n", pid); + + return 0; +} diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def index 9a33009eb..f4deb02b2 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def @@ -85,7 +85,7 @@ timer_settime 110 258 (kernel_timer_t timer_id, int flags, const struct itimer timer_gettime 108 259 (int timer_id, const struct itimerspec *setting) timer_getoverrun 109 260 (int timer_id) timer_delete 111 261 (kernel_timer_t timer_id) -clock_gettime 113 263 (const clockid_t which_clock, const struct timespec *tp) +clock_gettime 113 263 (clockid_t which_clock, struct timespec *tp) exit_group 94 248 (int error_code) set_robust_list 99 338 (struct robust_list_head *head, size_t len) get_robust_list 100 339 (int pid, struct robust_list_head **head_ptr, size_t *len_ptr) @@ -124,3 +124,4 @@ openat2 437 437 (int dirfd, char *pathname, struct open_how *how, size_t size pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags) rseq 293 398 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) membarrier 283 389 (int cmd, unsigned int flags, int cpu_id) +map_shadow_stack 453 ! (unsigned long addr, unsigned long size, unsigned int flags) \ No newline at end of file diff --git a/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl index aa6ffb44d..83dcdab4a 100644 --- a/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl @@ -46,7 +46,7 @@ __NR_sys_timer_gettime 108 sys_timer_gettime (int timer_id, const struct itimer __NR_sys_timer_getoverrun 109 sys_timer_getoverrun (int timer_id) __NR_sys_timer_settime 110 sys_timer_settime (kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting) __NR_sys_timer_delete 111 sys_timer_delete (kernel_timer_t timer_id) -__NR_clock_gettime 113 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp) +__NR_clock_gettime 113 sys_clock_gettime (clockid_t which_clock, struct timespec *tp) __NR_sched_setscheduler 119 sys_sched_setscheduler (int pid, int policy, struct sched_param *p) __NR_restart_syscall 128 sys_restart_syscall (void) __NR_kill 129 sys_kill (long pid, int sig) diff --git a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl index 85faca5a9..ad3d44634 100644 --- a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl @@ -84,7 +84,7 @@ __NR_sys_timer_settime 5217 sys_timer_settime (kernel_timer_t timer_id, int fl __NR_sys_timer_gettime 5218 sys_timer_gettime (int timer_id, const struct itimerspec *setting) __NR_sys_timer_getoverrun 5219 sys_timer_getoverrun (int timer_id) __NR_sys_timer_delete 5220 sys_timer_delete (kernel_timer_t timer_id) -__NR_clock_gettime 5222 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp) +__NR_clock_gettime 5222 sys_clock_gettime (clockid_t which_clock, struct timespec *tp) __NR_exit_group 5205 sys_exit_group (int error_code) __NR_set_thread_area 5242 sys_set_thread_area (unsigned long *addr) __NR_openat 5247 sys_openat (int dfd, const char *filename, int flags, int mode) diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl index c56b4e6de..3deb41cf7 100644 --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl @@ -82,7 +82,7 @@ __NR_sys_timer_settime 241 sys_timer_settime (kernel_timer_t timer_id, int flag __NR_sys_timer_gettime 242 sys_timer_gettime (int timer_id, const struct itimerspec *setting) __NR_sys_timer_getoverrun 243 sys_timer_getoverrun (int timer_id) __NR_sys_timer_delete 244 sys_timer_delete (kernel_timer_t timer_id) -__NR_clock_gettime 246 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp) +__NR_clock_gettime 246 sys_clock_gettime (clockid_t which_clock, struct timespec *tp) __NR_exit_group 234 sys_exit_group (int error_code) __NR_waitid 272 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru) __NR_set_robust_list 300 sys_set_robust_list (struct robust_list_head *head, size_t len) diff --git a/compel/arch/riscv64/plugins/std/syscalls/syscall.def b/compel/arch/riscv64/plugins/std/syscalls/syscall.def index 17f763e90..967f097f9 100644 --- a/compel/arch/riscv64/plugins/std/syscalls/syscall.def +++ b/compel/arch/riscv64/plugins/std/syscalls/syscall.def @@ -85,7 +85,7 @@ timer_settime 110 258 (kernel_timer_t timer_id, int flags, const struct itimer timer_gettime 108 259 (int timer_id, const struct itimerspec *setting) timer_getoverrun 109 260 (int timer_id) timer_delete 111 261 (kernel_timer_t timer_id) -clock_gettime 113 263 (const clockid_t which_clock, const struct timespec *tp) +clock_gettime 113 263 (clockid_t which_clock, struct timespec *tp) exit_group 94 248 (int error_code) set_robust_list 99 338 (struct robust_list_head *head, size_t len) get_robust_list 100 339 (int pid, struct robust_list_head **head_ptr, size_t *len_ptr) diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl index 018d58a59..ff2f33006 100644 --- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl @@ -82,7 +82,7 @@ __NR_sys_timer_settime 255 sys_timer_settime (kernel_timer_t timer_id, int flag __NR_sys_timer_gettime 256 sys_timer_gettime (int timer_id, const struct itimerspec *setting) __NR_sys_timer_getoverrun 257 sys_timer_getoverrun (int timer_id) __NR_sys_timer_delete 258 sys_timer_delete (kernel_timer_t timer_id) -__NR_clock_gettime 260 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp) +__NR_clock_gettime 260 sys_clock_gettime (clockid_t which_clock, struct timespec *tp) __NR_exit_group 248 sys_exit_group (int error_code) __NR_waitid 281 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru) __NR_set_robust_list 304 sys_set_robust_list (struct robust_list_head *head, size_t len) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl index 7fbfd69ad..8c3620c2a 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl @@ -85,7 +85,7 @@ __NR_sys_timer_settime 223 sys_timer_settime (kernel_timer_t timer_id, int fla __NR_sys_timer_gettime 224 sys_timer_gettime (int timer_id, const struct itimerspec *setting) __NR_sys_timer_getoverrun 225 sys_timer_getoverrun (int timer_id) __NR_sys_timer_delete 226 sys_timer_delete (kernel_timer_t timer_id) -__NR_clock_gettime 228 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp) +__NR_clock_gettime 228 sys_clock_gettime (clockid_t which_clock, struct timespec *tp) __NR_exit_group 231 sys_exit_group (int error_code) __NR_openat 257 sys_openat (int dfd, const char *filename, int flags, int mode) __NR_waitid 247 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru) diff --git a/compel/arch/x86/src/lib/infect.c b/compel/arch/x86/src/lib/infect.c index 644c483b4..afcf2c53b 100644 --- a/compel/arch/x86/src/lib/infect.c +++ b/compel/arch/x86/src/lib/infect.c @@ -761,7 +761,7 @@ bool __compel_shstk_enabled(user_fpregs_struct_t *ext_regs) return false; } -int parasite_setup_shstk(struct parasite_ctl *ctl, user_fpregs_struct_t *ext_regs) +int parasite_setup_shstk(struct parasite_ctl *ctl, __maybe_unused user_fpregs_struct_t *ext_regs) { pid_t pid = ctl->rpid; unsigned long sa_restorer = ctl->parasite_ip; diff --git a/compel/include/infect-priv.h b/compel/include/infect-priv.h index 9d3442839..8e78a7f6c 100644 --- a/compel/include/infect-priv.h +++ b/compel/include/infect-priv.h @@ -72,6 +72,7 @@ extern bool arch_can_dump_task(struct parasite_ctl *ctl); extern int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save, void *arg, unsigned long flags); extern int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs); +extern int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs); extern int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s); extern int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs); diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h index 1f61876ff..d21c261b7 100644 --- a/compel/include/uapi/infect.h +++ b/compel/include/uapi/infect.h @@ -192,6 +192,14 @@ void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v); extern void compel_get_stack(struct parasite_ctl *ctl, void **rstack, void **r_thread_stack); +#ifndef compel_host_supports_gcs +static inline bool compel_host_supports_gcs(void) +{ + return false; +} +#define compel_host_supports_gcs +#endif + #ifndef compel_shstk_enabled static inline bool compel_shstk_enabled(user_fpregs_struct_t *ext_regs) { diff --git a/compel/test/infect/Makefile b/compel/test/infect/Makefile index bacfad962..85efa5fd9 100644 --- a/compel/test/infect/Makefile +++ b/compel/test/infect/Makefile @@ -3,6 +3,11 @@ CFLAGS ?= -O2 -g -Wall -Werror COMPEL := ../../../compel/compel-host +ifeq ($(GCS_ENABLE),1) +CFLAGS += -mbranch-protection=standard -DGCS_TEST_ENABLE=1 +LDFLAGS += -z experimental-gcs=check +endif + all: victim spy run: @@ -17,7 +22,7 @@ clean: rm -f parasite.o victim: victim.c - $(CC) $(CFLAGS) -o $@ $^ + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) spy: spy.c parasite.h $(CC) $(CFLAGS) $(shell $(COMPEL) includes) -o $@ $< $(shell $(COMPEL) --static libs) diff --git a/compel/test/infect/spy.c b/compel/test/infect/spy.c index b10db4d47..143946941 100644 --- a/compel/test/infect/spy.c +++ b/compel/test/infect/spy.c @@ -112,6 +112,9 @@ int main(int argc, char **argv) return -1; } +#ifdef GCS_TEST_ENABLE + setenv("GLIBC_TUNABLES", "glibc.cpu.aarch64_gcs=1:glibc.cpu.aarch64_gcs_policy=2", 1); +#endif pid = vfork(); if (pid == 0) { close(p_in[1]); diff --git a/contrib/dependencies/apt-packages.sh b/contrib/dependencies/apt-packages.sh index 1fd42d4e6..7963be7b4 100755 --- a/contrib/dependencies/apt-packages.sh +++ b/contrib/dependencies/apt-packages.sh @@ -13,6 +13,7 @@ fi build-essential \ gdb \ git-core \ + iproute2 \ iptables \ kmod \ libaio-dev \ diff --git a/criu/arch/aarch64/Makefile b/criu/arch/aarch64/Makefile index b26487367..b87fcaa5b 100644 --- a/criu/arch/aarch64/Makefile +++ b/criu/arch/aarch64/Makefile @@ -6,3 +6,4 @@ obj-y += cpu.o obj-y += crtools.o obj-y += sigframe.o obj-y += bitops.o +obj-y += gcs.o \ No newline at end of file diff --git a/criu/arch/aarch64/crtools.c b/criu/arch/aarch64/crtools.c index 3ed5c9d63..2e89f9ce3 100644 --- a/criu/arch/aarch64/crtools.c +++ b/criu/arch/aarch64/crtools.c @@ -12,6 +12,7 @@ #include "common/compiler.h" #include #include "asm/dump.h" +#include "asm/gcs-types.h" #include "protobuf.h" #include "images/core.pb-c.h" #include "images/creds.pb-c.h" @@ -22,6 +23,7 @@ #include "restorer.h" #include "compel/infect.h" #include "pstree.h" +#include /* * cr_user_pac_* are a copy of the corresponding uapi structs @@ -146,6 +148,11 @@ static int save_pac_keys(int pid, CoreEntry *core) int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpsimd) { int i; + struct cr_user_gcs gcs_live; + struct iovec gcs_iov = { + .iov_base = &gcs_live, + .iov_len = sizeof(gcs_live), + }; CoreEntry *core = x; // Save the Aarch64 CPU state @@ -157,14 +164,25 @@ int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_str // Save the FP/SIMD state for (i = 0; i < 32; ++i) { - core->ti_aarch64->fpsimd->vregs[2 * i] = fpsimd->vregs[i]; - core->ti_aarch64->fpsimd->vregs[2 * i + 1] = fpsimd->vregs[i] >> 64; + core->ti_aarch64->fpsimd->vregs[2 * i] = fpsimd->fpstate.vregs[i]; + core->ti_aarch64->fpsimd->vregs[2 * i + 1] = fpsimd->fpstate.vregs[i] >> 64; } - assign_reg(core->ti_aarch64->fpsimd, fpsimd, fpsr); - assign_reg(core->ti_aarch64->fpsimd, fpsimd, fpcr); + assign_reg(core->ti_aarch64->fpsimd, &fpsimd->fpstate, fpsr); + assign_reg(core->ti_aarch64->fpsimd, &fpsimd->fpstate, fpcr); if (save_pac_keys(pid, core)) return -1; + + /* Save the GCS state */ + if (compel_host_supports_gcs()) { + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) < 0) { + pr_perror("Failed to get GCS for %d", pid); + return -1; + } + core->ti_aarch64->gcs->gcspr_el0 = gcs_live.gcspr_el0; + core->ti_aarch64->gcs->features_enabled = gcs_live.features_enabled; + } + return 0; } @@ -173,6 +191,7 @@ int arch_alloc_thread_info(CoreEntry *core) ThreadInfoAarch64 *ti_aarch64; UserAarch64RegsEntry *gpregs; UserAarch64FpsimdContextEntry *fpsimd; + UserAarch64GcsEntry *gcs; ti_aarch64 = xmalloc(sizeof(*ti_aarch64)); if (!ti_aarch64) @@ -202,6 +221,15 @@ int arch_alloc_thread_info(CoreEntry *core) if (!fpsimd->vregs) goto err; + /* Allocate & init GCS */ + if (compel_host_supports_gcs()) { + gcs = xmalloc(sizeof(*gcs)); + if (!gcs) + goto err; + user_aarch64_gcs_entry__init(gcs); + ti_aarch64->gcs = gcs; + } + return 0; err: return -1; @@ -231,6 +259,7 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) { int i; struct fpsimd_context *fpsimd = RT_SIGFRAME_FPU(sigframe); + struct gcs_context *gcs; if (core->ti_aarch64->fpsimd->n_vregs != 64) return 1; @@ -244,6 +273,18 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) fpsimd->head.magic = FPSIMD_MAGIC; fpsimd->head.size = sizeof(*fpsimd); + if (compel_host_supports_gcs()) { + gcs = RT_SIGFRAME_GCS(sigframe); + + pr_debug("sigframe gcspr %llx enabled %llx\n", gcs->gcspr, gcs->features_enabled); + + gcs->head.magic = GCS_MAGIC; + gcs->head.size = sizeof(*gcs); + gcs->reserved = 0; + gcs->gcspr = core->ti_aarch64->gcs->gcspr_el0 - 8; + gcs->features_enabled = core->ti_aarch64->gcs->features_enabled; + } + return 0; } diff --git a/criu/arch/aarch64/gcs.c b/criu/arch/aarch64/gcs.c new file mode 100644 index 000000000..4bdb9d2e4 --- /dev/null +++ b/criu/arch/aarch64/gcs.c @@ -0,0 +1,157 @@ +#include +#include + +#include +#include + +#include "asm/gcs-types.h" +#include "pstree.h" +#include "restorer.h" +#include "rst-malloc.h" +#include "vma.h" + +#include +#include + +static bool task_has_gcs_enabled(UserAarch64GcsEntry *gcs) +{ + return gcs && (gcs->features_enabled & PR_SHADOW_STACK_ENABLE) != 0; +} + +static bool host_supports_gcs(void) +{ + unsigned long hwcap = getauxval(AT_HWCAP); + return (hwcap & HWCAP_GCS) != 0; +} + +static bool task_needs_gcs(struct pstree_item *item, CoreEntry *core) +{ + UserAarch64GcsEntry *gcs; + + if (!task_alive(item)) + return false; + + gcs = core->ti_aarch64->gcs; + + if (task_has_gcs_enabled(gcs)) { + if (!host_supports_gcs()) { + pr_warn_once("Restoring task with GCS on non-GCS host\n"); + return false; + } + + pr_info("Restoring task with GCS\n"); + return true; + } + + pr_info("Restoring a task without GCS\n"); + return false; +} + +static int gcs_prepare_task(struct vm_area_list *vmas, + struct rst_shstk_info *gcs) +{ + struct vma_area *vma; + + list_for_each_entry(vma, &vmas->h, list) { + if (vma_area_is(vma, VMA_AREA_SHSTK) && + in_vma_area(vma, gcs->gcspr_el0)) { + unsigned long premapped_addr = vma->premmaped_addr; + unsigned long size = vma_area_len(vma); + + gcs->vma_start = vma->e->start; + gcs->vma_size = size; + gcs->premapped_addr = premapped_addr; + + return 0; + } + } + + pr_err("Unable to find a shadow stack vma: %lx\n", gcs->gcspr_el0); + return -1; +} + +int arch_gcs_prepare(struct pstree_item *item, CoreEntry *core, + struct task_restore_args *ta) +{ + int i; + struct thread_restore_args *args_array = (struct thread_restore_args *)(&ta[1]); + struct vm_area_list *vmas = &rsti(item)->vmas; + struct rst_shstk_info *gcs = &ta->shstk; + + if (!task_needs_gcs(item, core)) + return 0; + + gcs->gcspr_el0 = core->ti_aarch64->gcs->gcspr_el0; + gcs->features_enabled = core->ti_aarch64->gcs->features_enabled; + + if (gcs_prepare_task(vmas, gcs)) { + pr_err("gcs: failed to prepare shadow stack memory\n"); + return -1; + } + + for (i = 0; i < item->nr_threads; i++) { + struct thread_restore_args *thread_args = &args_array[i]; + + core = item->core[i]; + gcs = &thread_args->shstk; + + gcs->gcspr_el0 = core->ti_aarch64->gcs->gcspr_el0; + gcs->features_enabled = core->ti_aarch64->gcs->features_enabled; + + if (gcs_prepare_task(vmas, gcs)) { + pr_err("gcs: failed to prepare GCS memory\n"); + return -1; + } + } + + return 0; +} + +int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core, + int (*func)(void *arg), void *arg) +{ + int fret; + unsigned long flags = PR_SHADOW_STACK_ENABLE | + PR_SHADOW_STACK_PUSH | + PR_SHADOW_STACK_WRITE; + + long ret, x1_after, x8_after; + + /* If task doesn't need GCS, just call func */ + if (!task_needs_gcs(item, core)) { + return func(arg); + } + + pr_debug("gcs: GCS enable SVC about to fire: x8=%d x0=%d x1=0x%lx\n", + __NR_prctl, PR_SET_SHADOW_STACK_STATUS, flags); + + asm volatile( + "mov x0, %3\n" // x0 = PR_SET_SHADOW_STACK_STATUS (75) + "mov x1, %4\n" // x1 = flags + "mov x2, xzr\n" // x2 = 0 + "mov x3, xzr\n" // x3 = 0 + "mov x4, xzr\n" // x4 = 0 + "mov x8, %5\n" // x8 = __NR_prctl (167) + "svc #0\n" // Invoke syscall + "mov %0, x0\n" // Capture return value + "mov %1, x1\n" // Capture x1 after + "mov %2, x8\n" // Capture x8 after + : "=r"(ret), "=r"(x1_after), "=r"(x8_after) + : "i"(PR_SET_SHADOW_STACK_STATUS), // x0 - %3rd + "r"(flags), // x1 - %4th + "i"(__NR_prctl) // x8 - %5th + : "x0", "x1", "x2", "x3", "x4", "x8", "memory", "cc"); + + pr_info("gcs: after SVC: ret=%ld x1=%ld x8=%ld\n", ret, x1_after, x8_after); + + if (ret != 0) { + int err = errno; + pr_err("gcs: failed to enable GCS: ret=%ld errno=%d (%s)\n", ret, err, strerror(err)); + return -1; + } + + fret = func(arg); + exit(fret); + + return -1; +} diff --git a/criu/arch/aarch64/include/asm/gcs.h b/criu/arch/aarch64/include/asm/gcs.h new file mode 100644 index 000000000..28faa23b7 --- /dev/null +++ b/criu/arch/aarch64/include/asm/gcs.h @@ -0,0 +1,196 @@ +#ifndef __CR_ASM_GCS_H__ +#define __CR_ASM_GCS_H__ + +#include + +struct rst_shstk_info { + unsigned long vma_start; /* start of GCS VMA */ + unsigned long vma_size; /* size of GCS VMA */ + unsigned long premapped_addr; /* premapped buffer */ + unsigned long tmp_gcs; /* temp area for GCS if needed */ + u64 gcspr_el0; /* GCS pointer */ + u64 features_enabled; /* GCS flags */ +}; + +#define rst_shstk_info rst_shstk_info + +struct task_restore_args; +struct pstree_item; + +int arch_gcs_prepare(struct pstree_item *item, CoreEntry *core, + struct task_restore_args *ta); +#define arch_shstk_prepare arch_gcs_prepare + +int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core, + int (*func)(void *arg), void *arg); +#define arch_shstk_trampoline arch_shstk_trampoline + +static always_inline void shstk_set_restorer_stack(struct rst_shstk_info *gcs, void *ptr) +{ + gcs->tmp_gcs = (long unsigned)ptr; +} +#define shstk_set_restorer_stack shstk_set_restorer_stack + +static always_inline long shstk_restorer_stack_size(void) +{ + return PAGE_SIZE; +} +#define shstk_restorer_stack_size shstk_restorer_stack_size + +#ifdef CR_NOGLIBC +#include +#include +#include "vma.h" + +static inline unsigned long gcs_map(unsigned long addr, unsigned long size, unsigned int flags) +{ + long gcspr = sys_map_shadow_stack(addr, size, flags); + pr_info("gcs: syscall: map_shadow_stack at=%lx size=%ld\n", addr, size); + + if (gcspr < 0) { + pr_err("gcs: failed to map GCS at %lx: %ld\n", addr, gcspr); + return -1; + } + + if (addr && gcspr != addr) { + pr_err("gcs: address mismatch: need %lx, got %lx\n", addr, gcspr); + return -1; + } + + pr_info("gcs: mmapped GCS at %lx\n", gcspr); + + return gcspr; +} + +/* clang-format off */ +static always_inline void gcsss1(unsigned long *Xt) +{ + asm volatile ( + "sys #3, C7, C7, #2, %0\n" + : + : "rZ" (Xt) + : "memory"); +} + +static always_inline unsigned long *gcsss2(void) +{ + unsigned long *Xt; + + asm volatile ( + "SYSL %0, #3, C7, C7, #3\n" + : "=r" (Xt) + : + : "memory"); + + return Xt; +} + +static inline void gcsstr(unsigned long addr, unsigned long val) +{ + asm volatile( + "mov x0, %0\n" + "mov x1, %1\n" + ".inst 0xd91f1c01\n" // GCSSTR x1, [x0] + "mov x0, #0\n" + : + : "r"(addr), "r"(val) + : "x0", "x1", "memory"); +} +/* clang-format on */ + +static always_inline int gcs_restore(struct rst_shstk_info *gcs) +{ + unsigned long gcspr, val; + + if (!(gcs && gcs->features_enabled & PR_SHADOW_STACK_ENABLE)) { + return 0; + } + + gcspr = gcs->gcspr_el0 - 8; + + val = ALIGN_DOWN(GCS_SIGNAL_CAP(gcspr), 8); + pr_debug("gcs: [0] GCSSTR VAL=%lx write at GCSPR=%lx\n", val, gcspr); + gcsstr(gcspr, val); + + val = ALIGN_DOWN(GCS_SIGNAL_CAP(gcspr), 8) | GCS_CAP_VALID_TOKEN; + gcspr -= 8; + pr_debug("gcs: [1] GCSSTR VAL=%lx write at GCSPR=%lx\n", val, gcspr); + gcsstr(gcspr, val); + + pr_debug("gcs: about to switch stacks via GCSSS1 to: %lx\n", gcspr); + gcsss1((unsigned long *)gcspr); + return 0; +} +#define arch_shstk_restore gcs_restore + +static always_inline int gcs_vma_restore(VmaEntry *vma_entry) +{ + unsigned long shstk, i, ret; + unsigned long *gcs_data = (void *)vma_premmaped_start(vma_entry); + unsigned long vma_size = vma_entry_len(vma_entry); + + shstk = gcs_map(0, vma_size, SHADOW_STACK_SET_TOKEN); + if (shstk < 0) { + pr_err("Failed to map shadow stack at %lx: %ld\n", shstk, shstk); + } + + /* restore shadow stack contents */ + for (i = 0; i < vma_size / 8; i++) + gcsstr(shstk + i * 8, gcs_data[i]); + + pr_debug("unmap %lx %ld\n", (unsigned long)gcs_data, vma_size); + ret = sys_munmap(gcs_data, vma_size); + if (ret < 0) { + pr_err("Failed to unmap premmaped shadow stack\n"); + return ret; + } + + vma_premmaped_start(vma_entry) = shstk; + + return 0; +} +#define shstk_vma_restore gcs_vma_restore + +static always_inline int gcs_switch_to_restorer(struct rst_shstk_info *gcs) +{ + int ret; + unsigned long *ssp; + unsigned long addr; + unsigned long gcspr; + + if (!(gcs && gcs->features_enabled & PR_SHADOW_STACK_ENABLE)) { + return 0; + } + + pr_debug("gcs->premapped_addr + gcs->vma_size = %lx\n", gcs->premapped_addr + gcs->vma_size); + pr_debug("gcs->tmp_gcs = %lx\n", gcs->tmp_gcs); + addr = gcs->tmp_gcs; + + if (addr % PAGE_SIZE != 0) { + pr_err("gcs: 0x%lx not page-aligned to size 0x%lx\n", addr, PAGE_SIZE); + return -1; + } + + ret = sys_munmap((void *)addr, PAGE_SIZE); + if (ret < 0) { + pr_err("gcs: Failed to unmap aarea for dumpee GCS VMAs\n"); + return -1; + } + + gcspr = gcs_map(addr, PAGE_SIZE, SHADOW_STACK_SET_TOKEN); + + if (gcspr == -1) { + pr_err("gcs: failed to gcs_map(%lx, %lx)\n", (unsigned long)addr, PAGE_SIZE); + return -1; + } + + ssp = (unsigned long *)(addr + PAGE_SIZE - 8); + gcsss1(ssp); + + return 0; +} +#define arch_shstk_switch_to_restorer gcs_switch_to_restorer + +#endif /* CR_NOGLIBC */ + +#endif /* __CR_ASM_GCS_H__ */ diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h index 2174df4fa..8f3edc257 100644 --- a/criu/arch/aarch64/include/asm/restorer.h +++ b/criu/arch/aarch64/include/asm/restorer.h @@ -5,6 +5,7 @@ #include #include "asm/types.h" +#include "asm/gcs.h" #include "images/core.pb-c.h" #include diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 60b8e793c..a58aaf34a 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -2138,8 +2138,8 @@ int cr_dump_tasks(pid_t pid) InventoryEntry he = INVENTORY_ENTRY__INIT; InventoryEntry *parent_ie = NULL; struct pstree_item *item; - int pre_dump_ret = 0; - int ret = -1; + int ret; + int exit_code = -1; kerndat_warn_about_madv_guards(); @@ -2159,9 +2159,9 @@ int cr_dump_tasks(pid_t pid) goto err; root_item->pid->real = pid; - pre_dump_ret = run_scripts(ACT_PRE_DUMP); - if (pre_dump_ret != 0) { - pr_err("Pre dump script failed with %d!\n", pre_dump_ret); + ret = run_scripts(ACT_PRE_DUMP); + if (ret != 0) { + pr_err("Pre dump script failed with %d!\n", ret); goto err; } if (init_stats(DUMP_STATS)) @@ -2247,6 +2247,10 @@ int cr_dump_tasks(pid_t pid) goto err; } + ret = run_plugins(DUMP_DEVICES_LATE, pid); + if (ret && ret != -ENOTSUP) + goto err; + if (parent_ie) { inventory_entry__free_unpacked(parent_ie, NULL); parent_ie = NULL; @@ -2283,39 +2287,32 @@ int cr_dump_tasks(pid_t pid) * ipc shared memory, but an ipc namespace is dumped in a child * process. */ - ret = cr_dump_shmem(); - if (ret) + if (cr_dump_shmem()) goto err; if (root_ns_mask) { - ret = dump_namespaces(root_item, root_ns_mask); - if (ret) + if (dump_namespaces(root_item, root_ns_mask)) goto err; } if ((root_ns_mask & CLONE_NEWTIME) == 0) { - ret = dump_time_ns(0); - if (ret) + if (dump_time_ns(0)) goto err; } if (dump_aa_namespaces() < 0) goto err; - ret = dump_cgroups(); - if (ret) + if (dump_cgroups()) goto err; - ret = fix_external_unix_sockets(); - if (ret) + if (fix_external_unix_sockets()) goto err; - ret = tty_post_actions(); - if (ret) + if (tty_post_actions()) goto err; - ret = inventory_save_uptime(&he); - if (ret) + if (inventory_save_uptime(&he)) goto err; he.has_pre_dump_mode = false; @@ -2324,12 +2321,10 @@ int cr_dump_tasks(pid_t pid) he.allow_uprobes = true; } - ret = write_img_inventory(&he); - if (ret) - goto err; + exit_code = write_img_inventory(&he); err: if (parent_ie) inventory_entry__free_unpacked(parent_ie, NULL); - return cr_dump_finish(ret); + return cr_dump_finish(exit_code); } diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 057ec0e93..b92b92715 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1238,7 +1238,7 @@ static inline int fork_with_pid(struct pstree_item *item) pr_debug("PID: real %d virt %d\n", item->pid->real, vpid(item)); } - arch_shstk_unlock(item, ca.core, pid); + arch_shstk_unlock(item, ca.core, ret); err_unlock: if (!(ca.clone_flags & CLONE_NEWPID)) @@ -2440,6 +2440,7 @@ static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_he end_vma.e = &end_e; end_e.start = end_e.end = kdat.task_size; + INIT_LIST_HEAD(&end_vma.list); s_vma = list_first_entry(self_vma_list, struct vma_area, list); t_vma = list_first_entry(tgt_vma_list, struct vma_area, list); diff --git a/criu/crtools.c b/criu/crtools.c index e207133ac..4dc55a065 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -503,8 +503,8 @@ usage: " Inherit file descriptors, treating fd NUM as being\n" " already opened via an existing RES, which can be:\n" " tty[rdev:dev]\n" - " pipe[inode]\n" - " socket[inode]\n" + " pipe:[inode]\n" + " socket:[inode]\n" " file[mnt_id:inode]\n" " /memfd:name\n" " path/to/file\n" diff --git a/criu/files-ext.c b/criu/files-ext.c index 95ec8e37c..4cc99d921 100644 --- a/criu/files-ext.c +++ b/criu/files-ext.c @@ -45,10 +45,11 @@ static int open_fd(struct file_desc *d, int *new_fd) { struct ext_file_info *xfi; int fd; + bool retry_needed; xfi = container_of(d, struct ext_file_info, d); - fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id); + fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id, &retry_needed); if (fd < 0) { pr_err("Unable to restore %#x\n", xfi->xfe->id); return -1; @@ -57,8 +58,11 @@ static int open_fd(struct file_desc *d, int *new_fd) if (restore_fown(fd, xfi->xfe->fown)) return -1; - *new_fd = fd; - return 0; + if (!retry_needed) + *new_fd = fd; + else + *new_fd = -1; + return retry_needed; } static struct file_desc_ops ext_desc_ops = { diff --git a/criu/include/criu-plugin.h b/criu/include/criu-plugin.h index 9fb21a449..c3bea1385 100644 --- a/criu/include/criu-plugin.h +++ b/criu/include/criu-plugin.h @@ -62,6 +62,10 @@ enum { CR_PLUGIN_HOOK__POST_FORKING = 12, + CR_PLUGIN_HOOK__RESTORE_INIT = 13, + + CR_PLUGIN_HOOK__DUMP_DEVICES_LATE = 14, + CR_PLUGIN_HOOK__MAX }; @@ -70,7 +74,7 @@ enum { DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_UNIX_SK, int fd, int id); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_UNIX_SK, int id); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_FILE, int fd, int id); -DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id); +DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id, bool *retry_needed); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_MOUNT, char *mountpoint, int id); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_MOUNT, int id, char *mountpoint, char *old_root, int *is_file); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_LINK, int index, int type, char *kind); @@ -81,6 +85,8 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__PAUSE_DEVICES, int pid); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, int pid); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__POST_FORKING, void); +DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_INIT, void); +DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_DEVICES_LATE, int id); enum { CR_PLUGIN_STAGE__DUMP, diff --git a/criu/kerndat.c b/criu/kerndat.c index 997181ce7..2dc2f77d5 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -2116,6 +2116,7 @@ int kerndat_init(void) } if (!ret && kerndat_has_timer_cr_ids()) { pr_err("kerndat_has_timer_cr_ids has failed when initializing kerndat.\n"); + ret = -1; } if (!ret && kerndat_breakpoints()) { pr_err("kerndat_breakpoints has failed when initializing kerndat.\n"); diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 5c40b0e93..0a8aba41b 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1363,13 +1363,19 @@ __visible void __export_unmap(void) sys_munmap(bootstrap_start, bootstrap_len - vdso_rt_size); } -static void unregister_libc_rseq(struct rst_rseq_param *rseq) +static int unregister_libc_rseq(struct rst_rseq_param *rseq) { - if (!rseq->rseq_abi_pointer) - return; + long ret; - /* can't fail if rseq is registered */ - sys_rseq(decode_pointer(rseq->rseq_abi_pointer), rseq->rseq_abi_size, 1, rseq->signature); + if (!rseq->rseq_abi_pointer) + return 0; + + ret = sys_rseq(decode_pointer(rseq->rseq_abi_pointer), rseq->rseq_abi_size, 1, rseq->signature); + if (ret) { + pr_err("Failed to unregister libc rseq %ld\n", ret); + return -1; + } + return 0; } /* @@ -1803,7 +1809,8 @@ __visible long __export_restore_task(struct task_restore_args *args) * for instance once the kernel will want to update (struct rseq).cpu_id field: * https://github.com/torvalds/linux/blob/ce522ba9ef7e/kernel/rseq.c#L89 */ - unregister_libc_rseq(&args->libc_rseq); + if (unregister_libc_rseq(&args->libc_rseq)) + goto core_restore_end; if (unmap_old_vmas((void *)args->premmapped_addr, args->premmapped_len, bootstrap_start, bootstrap_len, args->task_size)) @@ -1989,6 +1996,9 @@ __visible long __export_restore_task(struct task_restore_args *args) for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) { if (vma_entry->madv & (1ul << m)) { + if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR))) + continue; + ret = sys_madvise(vma_entry->start, vma_entry_len(vma_entry), m); if (ret) { pr_err("madvise(%" PRIx64 ", %" PRIu64 ", %ld) " diff --git a/criu/plugin.c b/criu/plugin.c index 18da0499d..f9322a3c2 100644 --- a/criu/plugin.c +++ b/criu/plugin.c @@ -60,6 +60,8 @@ static cr_plugin_desc_t *cr_gen_plugin_desc(void *h, char *path) __assign_hook(PAUSE_DEVICES, "cr_plugin_pause_devices"); __assign_hook(CHECKPOINT_DEVICES, "cr_plugin_checkpoint_devices"); __assign_hook(POST_FORKING, "cr_plugin_post_forking"); + __assign_hook(RESTORE_INIT, "cr_plugin_restore_init"); + __assign_hook(DUMP_DEVICES_LATE, "cr_plugin_dump_devices_late"); #undef __assign_hook @@ -257,8 +259,16 @@ int cr_plugin_init(int stage) goto err; } - if (stage == CR_PLUGIN_STAGE__RESTORE && check_inventory_plugins()) - goto err; + if (stage == CR_PLUGIN_STAGE__RESTORE) { + int ret; + + if (check_inventory_plugins()) + goto err; + + ret = run_plugins(RESTORE_INIT); + if (ret < 0 && ret != -ENOTSUP) + goto err; + } exit_code = 0; err: diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 0d3b5b23f..f51f2e801 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -1477,7 +1477,7 @@ static int parse_mountinfo_ent(char *str, struct mount_info *new, char **fsname) goto err; new->mountpoint[0] = '.'; - ret = sscanf(str, "%i %i %u:%u %ms %s %ms %n", &new->mnt_id, &new->parent_mnt_id, &kmaj, &kmin, &new->root, + ret = sscanf(str, "%i %i %u:%u %ms %4094s %ms %n", &new->mnt_id, &new->parent_mnt_id, &kmaj, &kmin, &new->root, new->mountpoint + 1, &opt, &n); if (ret != 7) goto err; @@ -2302,10 +2302,10 @@ static int parse_file_lock_buf(char *buf, struct file_lock *fl, bool is_blocked) char fl_flag[10], fl_type[15], fl_option[10]; if (is_blocked) { - num = sscanf(buf, "%lld: -> %s %s %s %d %x:%x:%ld %lld %s", &fl->fl_id, fl_flag, fl_type, fl_option, + num = sscanf(buf, "%lld: -> %9s %14s %9s %d %x:%x:%ld %lld %31s", &fl->fl_id, fl_flag, fl_type, fl_option, &fl->fl_owner, &fl->maj, &fl->min, &fl->i_no, &fl->start, fl->end); } else { - num = sscanf(buf, "%lld:%s %s %s %d %x:%x:%ld %lld %s", &fl->fl_id, fl_flag, fl_type, fl_option, + num = sscanf(buf, "%lld:%9s %14s %9s %d %x:%x:%ld %lld %31s", &fl->fl_id, fl_flag, fl_type, fl_option, &fl->fl_owner, &fl->maj, &fl->min, &fl->i_no, &fl->start, fl->end); } diff --git a/criu/servicefd.c b/criu/servicefd.c index 06a8d3eba..dfb019066 100644 --- a/criu/servicefd.c +++ b/criu/servicefd.c @@ -313,4 +313,4 @@ int clone_service_fd(struct pstree_item *me) ret = 0; return ret; -} +} \ No newline at end of file diff --git a/criu/tty.c b/criu/tty.c index ae23094b7..9a4520d53 100644 --- a/criu/tty.c +++ b/criu/tty.c @@ -259,7 +259,7 @@ static int pts_fd_get_index(int fd, const struct fd_parms *p) { int index; const struct fd_link *link = p->link; - char *pos = strrchr(link->name, '/'); + const char *pos = strrchr(link->name, '/'); if (!pos || pos == (link->name + link->len - 1)) { pr_err("Unexpected format on path %s\n", link->name + 1); diff --git a/criu/util.c b/criu/util.c index e2f80e4c6..2eaad35bb 100644 --- a/criu/util.c +++ b/criu/util.c @@ -222,10 +222,9 @@ int close_safe(int *fd) if (*fd > -1) { ret = close(*fd); - if (!ret) - *fd = -1; - else - pr_perror("Unable to close fd %d", *fd); + if (ret) + pr_perror("Failed closing fd %d", *fd); + *fd = -1; } return ret; diff --git a/images/core-aarch64.proto b/images/core-aarch64.proto index 64b0ee9fb..a94911c0b 100644 --- a/images/core-aarch64.proto +++ b/images/core-aarch64.proto @@ -17,6 +17,11 @@ message user_aarch64_fpsimd_context_entry { required uint32 fpcr = 3; } +message user_aarch64_gcs_entry { + required uint64 gcspr_el0 = 1 [(criu).hex = true]; + required uint64 features_enabled = 2 [(criu).hex = true]; +} + message pac_address_keys { required uint64 apiakey_lo = 1; required uint64 apiakey_hi = 2; @@ -45,4 +50,5 @@ message thread_info_aarch64 { required user_aarch64_regs_entry gpregs = 3[(criu).hex = true]; required user_aarch64_fpsimd_context_entry fpsimd = 4; optional pac_keys pac_keys = 5; + optional user_aarch64_gcs_entry gcs = 6; } diff --git a/lib/pycriu/criu.py b/lib/pycriu/criu.py index 760d2be78..51a5c2902 100644 --- a/lib/pycriu/criu.py +++ b/lib/pycriu/criu.py @@ -242,7 +242,7 @@ class criu: # process resources from its own if criu is located in a same # process tree it is trying to dump. daemon = False - if req.type == rpc.DUMP and not req.opts.HasField('pid'): + if req.type == rpc.DUMP and (not req.opts.HasField('pid') or req.opts.pid == os.getpid()): daemon = True try: diff --git a/lib/pycriu/images/pb2dict.py b/lib/pycriu/images/pb2dict.py index a35dd3c3f..f22887a52 100644 --- a/lib/pycriu/images/pb2dict.py +++ b/lib/pycriu/images/pb2dict.py @@ -154,8 +154,9 @@ flags_maps = { gen_maps = { 'task_state': { 1: 'Alive', - 3: 'Zombie', - 6: 'Stopped' + 2: 'Dead', + 3: 'Stopped', + 6: 'Zombie', }, } diff --git a/plugins/amdgpu/Makefile b/plugins/amdgpu/Makefile index 3d55f8bb4..250e7b0e7 100644 --- a/plugins/amdgpu/Makefile +++ b/plugins/amdgpu/Makefile @@ -27,8 +27,8 @@ endif criu-amdgpu.pb-c.c: criu-amdgpu.proto protoc --proto_path=. --c_out=. criu-amdgpu.proto -amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c - $(CC) $(PLUGIN_CFLAGS) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC) +amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_dmabuf.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c amdgpu_socket_utils.c + $(CC) $(PLUGIN_CFLAGS) $(DEFINES) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC) amdgpu_plugin_clean: $(call msg-clean, $@) diff --git a/plugins/amdgpu/README.md b/plugins/amdgpu/README.md index 1078eafe6..b808fbc4f 100644 --- a/plugins/amdgpu/README.md +++ b/plugins/amdgpu/README.md @@ -3,7 +3,8 @@ Supporting ROCm with CRIU _Felix Kuehling _
_Rajneesh Bardwaj _
-_David Yat Sin _ +_David Yat Sin _
+_Yanning Yang _ # Introduction @@ -224,6 +225,26 @@ to resume execution on the GPUs. *This new plugin is enabled by the new hook `__RESUME_DEVICES_LATE` in our RFC patch series.* +## Restoring BO content in parallel + +Restoring the BO content is an important part in the restore of GPU state and +usually takes a significant amount of time. A possible location for this +procedure is the `cr_plugin_restore_file` hook. However, restoring in this hook +blocks the target process from performing other restore operations, which +hinders further optimization of the restore process. + +Therefore, a new plugin hook that runs in the master restore process is +introduced, and it interacts with the `cr_plugin_restore_file` hook to complete +the restore of BO content. Specifically, the target process only needs to send +the relevant BOs to the master restore process, while this new hook handles all +the restore of buffer objects. Through this method, during the restore of the BO +content, the target process can perform other restore operations, thus +accelerating the restore procedure. This is an implementation of the gCROP +method proposed in the ACM SoCC'24 paper: [On-demand and Parallel +Checkpoint/Restore for GPU Applications](https://dl.acm.org/doi/10.1145/3698038.3698510). + +*This optimization technique is enabled by the `__POST_FORKING` hook.* + ## Other CRIU changes In addition to the new plugins, we need to make some changes to CRIU itself to diff --git a/plugins/amdgpu/amdgpu_drm.h b/plugins/amdgpu/amdgpu_drm.h new file mode 100644 index 000000000..69227a12b --- /dev/null +++ b/plugins/amdgpu/amdgpu_drm.h @@ -0,0 +1,1801 @@ +/* amdgpu_drm.h -- Public header for the amdgpu driver -*- linux-c -*- + * + * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Fremont, California. + * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Kevin E. Martin + * Gareth Hughes + * Keith Whitwell + */ + +#ifndef __AMDGPU_DRM_H__ +#define __AMDGPU_DRM_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_AMDGPU_GEM_CREATE 0x00 +#define DRM_AMDGPU_GEM_MMAP 0x01 +#define DRM_AMDGPU_CTX 0x02 +#define DRM_AMDGPU_BO_LIST 0x03 +#define DRM_AMDGPU_CS 0x04 +#define DRM_AMDGPU_INFO 0x05 +#define DRM_AMDGPU_GEM_METADATA 0x06 +#define DRM_AMDGPU_GEM_WAIT_IDLE 0x07 +#define DRM_AMDGPU_GEM_VA 0x08 +#define DRM_AMDGPU_WAIT_CS 0x09 +#define DRM_AMDGPU_GEM_OP 0x10 +#define DRM_AMDGPU_GEM_USERPTR 0x11 +#define DRM_AMDGPU_WAIT_FENCES 0x12 +#define DRM_AMDGPU_VM 0x13 +#define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 +#define DRM_AMDGPU_SCHED 0x15 +#define DRM_AMDGPU_USERQ 0x16 +#define DRM_AMDGPU_USERQ_SIGNAL 0x17 +#define DRM_AMDGPU_USERQ_WAIT 0x18 +#define DRM_AMDGPU_GEM_LIST_HANDLES 0x19 +/* not upstream */ +#define DRM_AMDGPU_GEM_DGMA 0x5c + +/* hybrid specific ioctls */ +#define DRM_AMDGPU_SEM 0x5b + +#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) +#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) +#define DRM_IOCTL_AMDGPU_CTX DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union drm_amdgpu_ctx) +#define DRM_IOCTL_AMDGPU_BO_LIST DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, union drm_amdgpu_bo_list) +#define DRM_IOCTL_AMDGPU_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union drm_amdgpu_cs) +#define DRM_IOCTL_AMDGPU_INFO DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct drm_amdgpu_info) +#define DRM_IOCTL_AMDGPU_GEM_METADATA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct drm_amdgpu_gem_metadata) +#define DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union drm_amdgpu_gem_wait_idle) +#define DRM_IOCTL_AMDGPU_GEM_VA DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, struct drm_amdgpu_gem_va) +#define DRM_IOCTL_AMDGPU_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs) +#define DRM_IOCTL_AMDGPU_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) +#define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) +#define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) +#define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) +#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) +#define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) +#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal) +#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait) +#define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles) + +#define DRM_IOCTL_AMDGPU_GEM_DGMA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma) + +/** + * DOC: memory domains + * + * %AMDGPU_GEM_DOMAIN_CPU System memory that is not GPU accessible. + * Memory in this pool could be swapped out to disk if there is pressure. + * + * %AMDGPU_GEM_DOMAIN_GTT GPU accessible system memory, mapped into the + * GPU's virtual address space via gart. Gart memory linearizes non-contiguous + * pages of system memory, allows GPU access system memory in a linearized + * fashion. + * + * %AMDGPU_GEM_DOMAIN_VRAM Local video memory. For APUs, it is memory + * carved out by the BIOS. + * + * %AMDGPU_GEM_DOMAIN_GDS Global on-chip data storage used to share data + * across shader threads. + * + * %AMDGPU_GEM_DOMAIN_GWS Global wave sync, used to synchronize the + * execution of all the waves on a device. + * + * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines + * for appending data. + * + * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for + * signalling user mode queues. + * + * %AMDGPU_GEM_DOMAIN_MMIO_REMAP MMIO remap page (special mapping for HDP flushing). + */ +/* hybrid specific ioctls */ +#define DRM_IOCTL_AMDGPU_SEM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem) + +#define AMDGPU_GEM_DOMAIN_CPU 0x1 +#define AMDGPU_GEM_DOMAIN_GTT 0x2 +#define AMDGPU_GEM_DOMAIN_VRAM 0x4 +#define AMDGPU_GEM_DOMAIN_GDS 0x8 +#define AMDGPU_GEM_DOMAIN_GWS 0x10 +#define AMDGPU_GEM_DOMAIN_OA 0x20 +#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 +#define AMDGPU_GEM_DOMAIN_MMIO_REMAP 0x80 +#define AMDGPU_GEM_DOMAIN_DGMA 0x400 +#define AMDGPU_GEM_DOMAIN_DGMA_IMPORT 0x800 + +#define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ + AMDGPU_GEM_DOMAIN_GTT | \ + AMDGPU_GEM_DOMAIN_VRAM | \ + AMDGPU_GEM_DOMAIN_GDS | \ + AMDGPU_GEM_DOMAIN_GWS | \ + AMDGPU_GEM_DOMAIN_OA |\ + AMDGPU_GEM_DOMAIN_DOORBELL |\ + AMDGPU_GEM_DOMAIN_MMIO_REMAP |\ + AMDGPU_GEM_DOMAIN_DGMA |\ + AMDGPU_GEM_DOMAIN_DGMA_IMPORT) + +/* Flag that CPU access will be required for the case of VRAM domain */ +#define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) +/* Flag that CPU access will not work, this VRAM domain is invisible */ +#define AMDGPU_GEM_CREATE_NO_CPU_ACCESS (1 << 1) +/* Flag that USWC attributes should be used for GTT */ +#define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) +/* Flag that the memory should be in VRAM and cleared */ +#define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) +/* Flag that allocating the BO should use linear VRAM */ +#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) +/* Flag that BO is always valid in this VM */ +#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) +/* Flag that BO sharing will be explicitly synchronized */ +#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) +/* Flag that indicates allocating MQD gart on GFX9, where the mtype + * for the second page onward should be set to NC. It should never + * be used by user space applications. + */ +#define AMDGPU_GEM_CREATE_CP_MQD_GFX9 (1 << 8) +/* Flag that BO may contain sensitive data that must be wiped before + * releasing the memory + */ +#define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE (1 << 9) +/* Flag that BO will be encrypted and that the TMZ bit should be + * set in the PTEs when mapping this buffer via GPUVM or + * accessing it with various hw blocks + */ +#define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) +/* Flag that BO will be used only in preemptible context, which does + * not require GTT memory accounting + */ +#define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11) +/* Flag that BO can be discarded under memory pressure without keeping the + * content. + */ +#define AMDGPU_GEM_CREATE_DISCARDABLE (1 << 12) +/* Flag that BO is shared coherently between multiple devices or CPU threads. + * May depend on GPU instructions to flush caches to system scope explicitly. + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_COHERENT (1 << 13) +/* Flag that BO should not be cached by GPU. Coherent without having to flush + * GPU caches explicitly + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_UNCACHED (1 << 14) +/* Flag that BO should be coherent across devices when using device-level + * atomics. May depend on GPU instructions to flush caches to device scope + * explicitly, promoting them to system scope automatically. + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_EXT_COHERENT (1 << 15) +/* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */ +#define AMDGPU_GEM_CREATE_GFX12_DCC (1 << 16) + +/* hybrid specific */ +/* Flag that the memory should be in SPARSE resource */ +#define AMDGPU_GEM_CREATE_SPARSE (1ULL << 29) +/* Flag that the memory allocation should be from top of domain */ +#define AMDGPU_GEM_CREATE_TOP_DOWN (1ULL << 30) +/* Flag that the memory allocation should be pinned */ +#define AMDGPU_GEM_CREATE_NO_EVICT (1ULL << 31) + +struct drm_amdgpu_gem_create_in { + /** the requested memory size */ + __u64 bo_size; + /** physical start_addr alignment in bytes for some HW requirements */ + __u64 alignment; + /** the requested memory domains */ + __u64 domains; + /** allocation flags */ + __u64 domain_flags; +}; + +struct drm_amdgpu_gem_create_out { + /** returned GEM object handle */ + __u32 handle; + __u32 _pad; +}; + +union drm_amdgpu_gem_create { + struct drm_amdgpu_gem_create_in in; + struct drm_amdgpu_gem_create_out out; +}; + +/** Opcode to create new residency list. */ +#define AMDGPU_BO_LIST_OP_CREATE 0 +/** Opcode to destroy previously created residency list */ +#define AMDGPU_BO_LIST_OP_DESTROY 1 +/** Opcode to update resource information in the list */ +#define AMDGPU_BO_LIST_OP_UPDATE 2 + +struct drm_amdgpu_bo_list_in { + /** Type of operation */ + __u32 operation; + /** Handle of list or 0 if we want to create one */ + __u32 list_handle; + /** Number of BOs in list */ + __u32 bo_number; + /** Size of each element describing BO */ + __u32 bo_info_size; + /** Pointer to array describing BOs */ + __u64 bo_info_ptr; +}; + +struct drm_amdgpu_bo_list_entry { + /** Handle of BO */ + __u32 bo_handle; + /** New (if specified) BO priority to be used during migration */ + __u32 bo_priority; +}; + +struct drm_amdgpu_bo_list_out { + /** Handle of resource list */ + __u32 list_handle; + __u32 _pad; +}; + +union drm_amdgpu_bo_list { + struct drm_amdgpu_bo_list_in in; + struct drm_amdgpu_bo_list_out out; +}; + +/* context related */ +#define AMDGPU_CTX_OP_ALLOC_CTX 1 +#define AMDGPU_CTX_OP_FREE_CTX 2 +#define AMDGPU_CTX_OP_QUERY_STATE 3 +#define AMDGPU_CTX_OP_QUERY_STATE2 4 +#define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 +#define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 + +/* GPU reset status */ +#define AMDGPU_CTX_NO_RESET 0 +/* this the context caused it */ +#define AMDGPU_CTX_GUILTY_RESET 1 +/* some other context caused it */ +#define AMDGPU_CTX_INNOCENT_RESET 2 +/* unknown cause */ +#define AMDGPU_CTX_UNKNOWN_RESET 3 + +/* indicate gpu reset occurred after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET (1<<0) +/* indicate vram lost occurred after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1) +/* indicate some job from this context once cause gpu hang */ +#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2) +/* indicate some errors are detected by RAS */ +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3) +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4) +/* indicate that the reset hasn't completed yet */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5) + +/* Context priority level */ +#define AMDGPU_CTX_PRIORITY_UNSET -2048 +#define AMDGPU_CTX_PRIORITY_VERY_LOW -1023 +#define AMDGPU_CTX_PRIORITY_LOW -512 +#define AMDGPU_CTX_PRIORITY_NORMAL 0 +/* + * When used in struct drm_amdgpu_ctx_in, a priority above NORMAL requires + * CAP_SYS_NICE or DRM_MASTER +*/ +#define AMDGPU_CTX_PRIORITY_HIGH 512 +#define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 + +/* select a stable profiling pstate for perfmon tools */ +#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK 0xf +#define AMDGPU_CTX_STABLE_PSTATE_NONE 0 +#define AMDGPU_CTX_STABLE_PSTATE_STANDARD 1 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK 2 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 +#define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 + +struct drm_amdgpu_ctx_in { + /** AMDGPU_CTX_OP_* */ + __u32 op; + /** Flags */ + __u32 flags; + __u32 ctx_id; + /** AMDGPU_CTX_PRIORITY_* */ + __s32 priority; +}; + +union drm_amdgpu_ctx_out { + struct { + __u32 ctx_id; + __u32 _pad; + } alloc; + + struct { + /** For future use, no flags defined so far */ + __u64 flags; + /** Number of resets caused by this context so far. */ + __u32 hangs; + /** Reset status since the last call of the ioctl. */ + __u32 reset_status; + } state; + + struct { + __u32 flags; + __u32 _pad; + } pstate; +}; + +union drm_amdgpu_ctx { + struct drm_amdgpu_ctx_in in; + union drm_amdgpu_ctx_out out; +}; + +/* user queue IOCTL operations */ +#define AMDGPU_USERQ_OP_CREATE 1 +#define AMDGPU_USERQ_OP_FREE 2 + +/* queue priority levels */ +/* low < normal low < normal high < high */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK 0x3 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */ +/* for queues that need access to protected content */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2) + +/* + * This structure is a container to pass input configuration + * info for all supported userqueue related operations. + * For operation AMDGPU_USERQ_OP_CREATE: user is expected + * to set all fields, excep the parameter 'queue_id'. + * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected + * to be set is 'queue_id', eveything else is ignored. + */ +struct drm_amdgpu_userq_in { + /** AMDGPU_USERQ_OP_* */ + __u32 op; + /** Queue id passed for operation USERQ_OP_FREE */ + __u32 queue_id; + /** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */ + __u32 ip_type; + /** + * @doorbell_handle: the handle of doorbell GEM object + * associated with this userqueue client. + */ + __u32 doorbell_handle; + /** + * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo. + * Kernel will generate absolute doorbell offset using doorbell_handle + * and doorbell_offset in the doorbell bo. + */ + __u32 doorbell_offset; + /** + * @flags: flags used for queue parameters + */ + __u32 flags; + /** + * @queue_va: Virtual address of the GPU memory which holds the queue + * object. The queue holds the workload packets. + */ + __u64 queue_va; + /** + * @queue_size: Size of the queue in bytes, this needs to be 256-byte + * aligned. + */ + __u64 queue_size; + /** + * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + */ + __u64 rptr_va; + /** + * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + * + * Queue, RPTR and WPTR can come from the same object, as long as the size + * and alignment related requirements are met. + */ + __u64 wptr_va; + /** + * @mqd: MQD (memory queue descriptor) is a set of parameters which allow + * the GPU to uniquely define and identify a usermode queue. + * + * MQD data can be of different size for different GPU IP/engine and + * their respective versions/revisions, so this points to a __u64 * + * which holds IP specific MQD of this usermode queue. + */ + __u64 mqd; + /** + * @size: size of MQD data in bytes, it must match the MQD structure + * size of the respective engine/revision defined in UAPI for ex, for + * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11). + */ + __u64 mqd_size; +}; + +/* The structure to carry output of userqueue ops */ +struct drm_amdgpu_userq_out { + /** + * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique + * queue ID to represent the newly created userqueue in the system, otherwise + * it should be ignored. + */ + __u32 queue_id; + __u32 _pad; +}; + +union drm_amdgpu_userq { + struct drm_amdgpu_userq_in in; + struct drm_amdgpu_userq_out out; +}; + +/* GFX V11 IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_gfx11 { + /** + * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 shadow_va; + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 csa_va; +}; + +/* GFX V11 SDMA IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_sdma_gfx11 { + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. + */ + __u64 csa_va; +}; + +/* GFX V11 Compute IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_compute_gfx11 { + /** + * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. + */ + __u64 eop_va; +}; + +/* userq signal/wait ioctl */ +struct drm_amdgpu_userq_signal { + /** + * @queue_id: Queue handle used by the userq fence creation function + * to retrieve the WPTR. + */ + __u32 queue_id; + __u32 pad; + /** + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to be signaled. + */ + __u64 syncobj_handles; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles. + */ + __u64 num_syncobj_handles; + /** + * @bo_read_handles: The list of BO handles that the submitted user queue job + * is using for read only. This will update BO fences in the kernel. + */ + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of BO handles that the submitted user queue job + * is using for write only. This will update BO fences in the kernel. + */ + __u64 bo_write_handles; + /** + * @num_bo_read_handles: A count that represents the number of read BO handles in + * @bo_read_handles. + */ + __u32 num_bo_read_handles; + /** + * @num_bo_write_handles: A count that represents the number of write BO handles in + * @bo_write_handles. + */ + __u32 num_bo_write_handles; +}; + +struct drm_amdgpu_userq_fence_info { + /** + * @va: A gpu address allocated for each queue which stores the + * read pointer (RPTR) value. + */ + __u64 va; + /** + * @value: A 64 bit value represents the write pointer (WPTR) of the + * queue commands which compared with the RPTR value to signal the + * fences. + */ + __u64 value; +}; + +struct drm_amdgpu_userq_wait { + /** + * @waitq_id: Queue handle used by the userq wait IOCTL to retrieve the + * wait queue and maintain the fence driver references in it. + */ + __u32 waitq_id; + __u32 pad; + /** + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 syncobj_handles; + /** + * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by + * the user queue job to get the va/value pairs at given @syncobj_timeline_points. + */ + __u64 syncobj_timeline_handles; + /** + * @syncobj_timeline_points: The list of timeline syncobj points submitted by the + * user queue job for the corresponding @syncobj_timeline_handles. + */ + __u64 syncobj_timeline_points; + /** + * @bo_read_handles: The list of read BO handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of write BO handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 bo_write_handles; + /** + * @num_syncobj_timeline_handles: A count that represents the number of timeline + * syncobj handles in @syncobj_timeline_handles. + */ + __u16 num_syncobj_timeline_handles; + /** + * @num_fences: This field can be used both as input and output. As input it defines + * the maximum number of fences that can be returned and as output it will specify + * how many fences were actually returned from the ioctl. + */ + __u16 num_fences; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles. + */ + __u32 num_syncobj_handles; + /** + * @num_bo_read_handles: A count that represents the number of read BO handles in + * @bo_read_handles. + */ + __u32 num_bo_read_handles; + /** + * @num_bo_write_handles: A count that represents the number of write BO handles in + * @bo_write_handles. + */ + __u32 num_bo_write_handles; + /** + * @out_fences: The field is a return value from the ioctl containing the list of + * address/value pairs to wait for. + */ + __u64 out_fences; +}; + +/* sem related */ +#define AMDGPU_SEM_OP_CREATE_SEM 1 +#define AMDGPU_SEM_OP_WAIT_SEM 2 +#define AMDGPU_SEM_OP_SIGNAL_SEM 3 +#define AMDGPU_SEM_OP_DESTROY_SEM 4 +#define AMDGPU_SEM_OP_IMPORT_SEM 5 +#define AMDGPU_SEM_OP_EXPORT_SEM 6 + +struct drm_amdgpu_sem_in { + /** AMDGPU_SEM_OP_* */ + uint32_t op; + uint32_t handle; + uint32_t ctx_id; + uint32_t ip_type; + uint32_t ip_instance; + uint32_t ring; + uint64_t seq; +}; + +union drm_amdgpu_sem_out { + int32_t fd; + uint32_t handle; +}; + +union drm_amdgpu_sem { + struct drm_amdgpu_sem_in in; + union drm_amdgpu_sem_out out; +}; + +/* vm ioctl */ +#define AMDGPU_VM_OP_RESERVE_VMID 1 +#define AMDGPU_VM_OP_UNRESERVE_VMID 2 + +struct drm_amdgpu_vm_in { + /** AMDGPU_VM_OP_* */ + __u32 op; + __u32 flags; +}; + +struct drm_amdgpu_vm_out { + /** For future use, no flags defined so far */ + __u64 flags; +}; + +union drm_amdgpu_vm { + struct drm_amdgpu_vm_in in; + struct drm_amdgpu_vm_out out; +}; + +/* sched ioctl */ +#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE 1 +#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE 2 + +struct drm_amdgpu_sched_in { + /* AMDGPU_SCHED_OP_* */ + __u32 op; + __u32 fd; + /** AMDGPU_CTX_PRIORITY_* */ + __s32 priority; + __u32 ctx_id; +}; + +union drm_amdgpu_sched { + struct drm_amdgpu_sched_in in; +}; + +/* + * This is not a reliable API and you should expect it to fail for any + * number of reasons and have fallback path that do not use userptr to + * perform any operation. + */ +#define AMDGPU_GEM_USERPTR_READONLY (1 << 0) +#define AMDGPU_GEM_USERPTR_ANONONLY (1 << 1) +#define AMDGPU_GEM_USERPTR_VALIDATE (1 << 2) +#define AMDGPU_GEM_USERPTR_REGISTER (1 << 3) + +struct drm_amdgpu_gem_userptr { + __u64 addr; + __u64 size; + /* AMDGPU_GEM_USERPTR_* */ + __u32 flags; + /* Resulting GEM handle */ + __u32 handle; +}; + +#define AMDGPU_GEM_DGMA_IMPORT 0 +#define AMDGPU_GEM_DGMA_QUERY_PHYS_ADDR 1 +struct drm_amdgpu_gem_dgma { + __u64 addr; + __u64 size; + __u32 op; + __u32 handle; +}; + +/* SI-CI-VI: */ +/* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */ +#define AMDGPU_TILING_ARRAY_MODE_SHIFT 0 +#define AMDGPU_TILING_ARRAY_MODE_MASK 0xf +#define AMDGPU_TILING_PIPE_CONFIG_SHIFT 4 +#define AMDGPU_TILING_PIPE_CONFIG_MASK 0x1f +#define AMDGPU_TILING_TILE_SPLIT_SHIFT 9 +#define AMDGPU_TILING_TILE_SPLIT_MASK 0x7 +#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT 12 +#define AMDGPU_TILING_MICRO_TILE_MODE_MASK 0x7 +#define AMDGPU_TILING_BANK_WIDTH_SHIFT 15 +#define AMDGPU_TILING_BANK_WIDTH_MASK 0x3 +#define AMDGPU_TILING_BANK_HEIGHT_SHIFT 17 +#define AMDGPU_TILING_BANK_HEIGHT_MASK 0x3 +#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT 19 +#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK 0x3 +#define AMDGPU_TILING_NUM_BANKS_SHIFT 21 +#define AMDGPU_TILING_NUM_BANKS_MASK 0x3 + +/* GFX9 - GFX11: */ +#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f +#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT 5 +#define AMDGPU_TILING_DCC_OFFSET_256B_MASK 0xFFFFFF +#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT 29 +#define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF +#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 43 +#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1 +#define AMDGPU_TILING_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_SCANOUT_MASK 0x1 + +/* GFX12 and later: */ +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 +/* These are DCC recompression settings for memory management: */ +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */ +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ +/* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata + * to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */ +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT 14 +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK 0x1 +/* bit gap */ +#define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1 + +/* Set/Get helpers for tiling flags. */ +#define AMDGPU_TILING_SET(field, value) \ + (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) +#define AMDGPU_TILING_GET(value, field) \ + (((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK) + +#define AMDGPU_GEM_METADATA_OP_SET_METADATA 1 +#define AMDGPU_GEM_METADATA_OP_GET_METADATA 2 + +/** The same structure is shared for input/output */ +struct drm_amdgpu_gem_metadata { + /** GEM Object handle */ + __u32 handle; + /** Do we want get or set metadata */ + __u32 op; + struct { + /** For future use, no flags defined so far */ + __u64 flags; + /** family specific tiling info */ + __u64 tiling_info; + __u32 data_size_bytes; + __u32 data[64]; + } data; +}; + +struct drm_amdgpu_gem_mmap_in { + /** the GEM object handle */ + __u32 handle; + __u32 _pad; +}; + +struct drm_amdgpu_gem_mmap_out { + /** mmap offset from the vma offset manager */ + __u64 addr_ptr; +}; + +union drm_amdgpu_gem_mmap { + struct drm_amdgpu_gem_mmap_in in; + struct drm_amdgpu_gem_mmap_out out; +}; + +struct drm_amdgpu_gem_wait_idle_in { + /** GEM object handle */ + __u32 handle; + /** For future use, no flags defined so far */ + __u32 flags; + /** Absolute timeout to wait */ + __u64 timeout; +}; + +struct drm_amdgpu_gem_wait_idle_out { + /** BO status: 0 - BO is idle, 1 - BO is busy */ + __u32 status; + /** Returned current memory domain */ + __u32 domain; +}; + +union drm_amdgpu_gem_wait_idle { + struct drm_amdgpu_gem_wait_idle_in in; + struct drm_amdgpu_gem_wait_idle_out out; +}; + +struct drm_amdgpu_wait_cs_in { + /* Command submission handle + * handle equals 0 means none to wait for + * handle equals ~0ull means wait for the latest sequence number + */ + __u64 handle; + /** Absolute timeout to wait */ + __u64 timeout; + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u32 ctx_id; +}; + +struct drm_amdgpu_wait_cs_out { + /** CS status: 0 - CS completed, 1 - CS still busy */ + __u64 status; +}; + +union drm_amdgpu_wait_cs { + struct drm_amdgpu_wait_cs_in in; + struct drm_amdgpu_wait_cs_out out; +}; + +struct drm_amdgpu_fence { + __u32 ctx_id; + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u64 seq_no; +}; + +struct drm_amdgpu_wait_fences_in { + /** This points to uint64_t * which points to fences */ + __u64 fences; + __u32 fence_count; + __u32 wait_all; + __u64 timeout_ns; +}; + +struct drm_amdgpu_wait_fences_out { + __u32 status; + __u32 first_signaled; +}; + +union drm_amdgpu_wait_fences { + struct drm_amdgpu_wait_fences_in in; + struct drm_amdgpu_wait_fences_out out; +}; + +#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0 +#define AMDGPU_GEM_OP_SET_PLACEMENT 1 +#define AMDGPU_GEM_OP_GET_MAPPING_INFO 2 + +struct drm_amdgpu_gem_vm_entry { + /* Start of mapping (in bytes) */ + __u64 addr; + + /* Size of mapping (in bytes) */ + __u64 size; + + /* Mapping offset */ + __u64 offset; + + /* flags needed to recreate mapping */ + __u64 flags; +}; + +/* Sets or returns a value associated with a buffer. */ +struct drm_amdgpu_gem_op { + /** GEM object handle */ + __u32 handle; + /** AMDGPU_GEM_OP_* */ + __u32 op; + /** Input or return value. For MAPPING_INFO op: pointer to array of struct drm_amdgpu_gem_vm_entry */ + __u64 value; + /** For MAPPING_INFO op: number of mappings (in/out) */ + __u32 num_entries; + + __u32 padding; +}; + +#define AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT (1 << 0) + +struct drm_amdgpu_gem_list_handles { + /* User pointer to array of drm_amdgpu_gem_bo_info_entry */ + __u64 entries; + + /* Size of entries buffer / Number of handles in process (if larger than size of buffer, must retry) */ + __u32 num_entries; + + __u32 padding; +}; + +struct drm_amdgpu_gem_list_handles_entry { + /* gem handle of buffer object */ + __u32 gem_handle; + + /* Currently just one flag: IS_IMPORT */ + __u32 flags; + + /* Size of bo */ + __u64 size; + + /* Preferred domains for GEM_CREATE */ + __u64 preferred_domains; + + /* GEM_CREATE flags for re-creation of buffer */ + __u64 alloc_flags; + + /* physical start_addr alignment in bytes for some HW requirements */ + __u64 alignment; +}; + +#define AMDGPU_VA_OP_MAP 1 +#define AMDGPU_VA_OP_UNMAP 2 +#define AMDGPU_VA_OP_CLEAR 3 +#define AMDGPU_VA_OP_REPLACE 4 + +/* Delay the page table update till the next CS */ +#define AMDGPU_VM_DELAY_UPDATE (1 << 0) + +/* Mapping flags */ +/* readable mapping */ +#define AMDGPU_VM_PAGE_READABLE (1 << 1) +/* writable mapping */ +#define AMDGPU_VM_PAGE_WRITEABLE (1 << 2) +/* executable mapping, new for VI */ +#define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) +/* partially resident texture */ +#define AMDGPU_VM_PAGE_PRT (1 << 4) +/* MTYPE flags use bit 5 to 8 */ +#define AMDGPU_VM_MTYPE_MASK (0xf << 5) +/* Default MTYPE. Pre-AI must use this. Recommended for newer ASICs. */ +#define AMDGPU_VM_MTYPE_DEFAULT (0 << 5) +/* Use Non Coherent MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_NC (1 << 5) +/* Use Write Combine MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_WC (2 << 5) +/* Use Cache Coherent MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_CC (3 << 5) +/* Use UnCached MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_UC (4 << 5) +/* Use Read Write MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_RW (5 << 5) +/* don't allocate MALL */ +#define AMDGPU_VM_PAGE_NOALLOC (1 << 9) + +struct drm_amdgpu_gem_va { + /** GEM object handle */ + __u32 handle; + __u32 _pad; + /** AMDGPU_VA_OP_* */ + __u32 operation; + /** AMDGPU_VM_PAGE_* */ + __u32 flags; + /** va address to assign . Must be correctly aligned.*/ + __u64 va_address; + /** Specify offset inside of BO to assign. Must be correctly aligned.*/ + __u64 offset_in_bo; + /** Specify mapping size. Must be correctly aligned. */ + __u64 map_size; + /** + * vm_timeline_point is a sequence number used to add new timeline point. + */ + __u64 vm_timeline_point; + /** + * The vm page table update fence is installed in given vm_timeline_syncobj_out + * at vm_timeline_point. + */ + __u32 vm_timeline_syncobj_out; + /** the number of syncobj handles in @input_fence_syncobj_handles */ + __u32 num_syncobj_handles; + /** Array of sync object handle to wait for given input fences */ + __u64 input_fence_syncobj_handles; +}; + +#define AMDGPU_HW_IP_GFX 0 +#define AMDGPU_HW_IP_COMPUTE 1 +#define AMDGPU_HW_IP_DMA 2 +#define AMDGPU_HW_IP_UVD 3 +#define AMDGPU_HW_IP_VCE 4 +#define AMDGPU_HW_IP_UVD_ENC 5 +#define AMDGPU_HW_IP_VCN_DEC 6 +/* + * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support + * both encoding and decoding jobs. + */ +#define AMDGPU_HW_IP_VCN_ENC 7 +#define AMDGPU_HW_IP_VCN_JPEG 8 +#define AMDGPU_HW_IP_VPE 9 +#define AMDGPU_HW_IP_NUM 10 + +#define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1 + +#define AMDGPU_CHUNK_ID_IB 0x01 +#define AMDGPU_CHUNK_ID_FENCE 0x02 +#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 +#define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 +#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 +#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 +#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x08 +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x09 +#define AMDGPU_CHUNK_ID_CP_GFX_SHADOW 0x0a + +struct drm_amdgpu_cs_chunk { + __u32 chunk_id; + __u32 length_dw; + __u64 chunk_data; +}; + +struct drm_amdgpu_cs_in { + /** Rendering context id */ + __u32 ctx_id; + /** Handle of resource list associated with CS */ + __u32 bo_list_handle; + __u32 num_chunks; + __u32 flags; + /** this points to __u64 * which point to cs chunks */ + __u64 chunks; +}; + +struct drm_amdgpu_cs_out { + __u64 handle; +}; + +union drm_amdgpu_cs { + struct drm_amdgpu_cs_in in; + struct drm_amdgpu_cs_out out; +}; + +/* Specify flags to be used for IB */ + +/* This IB should be submitted to CE */ +#define AMDGPU_IB_FLAG_CE (1<<0) + +/* Preamble flag, which means the IB could be dropped if no context switch */ +#define AMDGPU_IB_FLAG_PREAMBLE (1<<1) + +/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ +#define AMDGPU_IB_FLAG_PREEMPT (1<<2) + +/* The IB fence should do the L2 writeback but not invalidate any shader + * caches (L2/vL1/sL1/I$). */ +#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) + +/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER. + * This will reset wave ID counters for the IB. + */ +#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) + +/* Flag the IB as secure (TMZ) + */ +#define AMDGPU_IB_FLAGS_SECURE (1 << 5) + +/* Tell KMD to flush and invalidate caches + */ +#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC (1 << 6) + +struct drm_amdgpu_cs_chunk_ib { + __u32 _pad; + /** AMDGPU_IB_FLAG_* */ + __u32 flags; + /** Virtual address to begin IB execution */ + __u64 va_start; + /** Size of submission */ + __u32 ib_bytes; + /** HW IP to submit to */ + __u32 ip_type; + /** HW IP index of the same type to submit to */ + __u32 ip_instance; + /** Ring index to submit to */ + __u32 ring; +}; + +struct drm_amdgpu_cs_chunk_dep { + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u32 ctx_id; + __u64 handle; +}; + +struct drm_amdgpu_cs_chunk_fence { + __u32 handle; + __u32 offset; +}; + +struct drm_amdgpu_cs_chunk_sem { + __u32 handle; +}; + +struct drm_amdgpu_cs_chunk_syncobj { + __u32 handle; + __u32 flags; + __u64 point; +}; + +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2 + +union drm_amdgpu_fence_to_handle { + struct { + struct drm_amdgpu_fence fence; + __u32 what; + __u32 pad; + } in; + struct { + __u32 handle; + } out; +}; + +struct drm_amdgpu_cs_chunk_data { + union { + struct drm_amdgpu_cs_chunk_ib ib_data; + struct drm_amdgpu_cs_chunk_fence fence_data; + }; +}; + +#define AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW 0x1 + +struct drm_amdgpu_cs_chunk_cp_gfx_shadow { + __u64 shadow_va; + __u64 csa_va; + __u64 gds_va; + __u64 flags; +}; + +/* + * Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU + * + */ +#define AMDGPU_IDS_FLAGS_FUSION 0x01 +#define AMDGPU_IDS_FLAGS_PREEMPTION 0x02 +#define AMDGPU_IDS_FLAGS_TMZ 0x04 +#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x08 +#define AMDGPU_IDS_FLAGS_GANG_SUBMIT 0x10 + +/* + * Query h/w info: Flag identifying VF/PF/PT mode + * + */ +#define AMDGPU_IDS_FLAGS_MODE_MASK 0x300 +#define AMDGPU_IDS_FLAGS_MODE_SHIFT 0x8 +#define AMDGPU_IDS_FLAGS_MODE_PF 0x0 +#define AMDGPU_IDS_FLAGS_MODE_VF 0x1 +#define AMDGPU_IDS_FLAGS_MODE_PT 0x2 + +/* indicate if acceleration can be working */ +#define AMDGPU_INFO_ACCEL_WORKING 0x00 +/* get the crtc_id from the mode object id? */ +#define AMDGPU_INFO_CRTC_FROM_ID 0x01 +/* query hw IP info */ +#define AMDGPU_INFO_HW_IP_INFO 0x02 +/* query hw IP instance count for the specified type */ +#define AMDGPU_INFO_HW_IP_COUNT 0x03 +/* timestamp for GL_ARB_timer_query */ +#define AMDGPU_INFO_TIMESTAMP 0x05 +/* Query the firmware version */ +#define AMDGPU_INFO_FW_VERSION 0x0e + /* Subquery id: Query VCE firmware version */ + #define AMDGPU_INFO_FW_VCE 0x1 + /* Subquery id: Query UVD firmware version */ + #define AMDGPU_INFO_FW_UVD 0x2 + /* Subquery id: Query GMC firmware version */ + #define AMDGPU_INFO_FW_GMC 0x03 + /* Subquery id: Query GFX ME firmware version */ + #define AMDGPU_INFO_FW_GFX_ME 0x04 + /* Subquery id: Query GFX PFP firmware version */ + #define AMDGPU_INFO_FW_GFX_PFP 0x05 + /* Subquery id: Query GFX CE firmware version */ + #define AMDGPU_INFO_FW_GFX_CE 0x06 + /* Subquery id: Query GFX RLC firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC 0x07 + /* Subquery id: Query GFX MEC firmware version */ + #define AMDGPU_INFO_FW_GFX_MEC 0x08 + /* Subquery id: Query SMC firmware version */ + #define AMDGPU_INFO_FW_SMC 0x0a + /* Subquery id: Query SDMA firmware version */ + #define AMDGPU_INFO_FW_SDMA 0x0b + /* Subquery id: Query PSP SOS firmware version */ + #define AMDGPU_INFO_FW_SOS 0x0c + /* Subquery id: Query PSP ASD firmware version */ + #define AMDGPU_INFO_FW_ASD 0x0d + /* Subquery id: Query VCN firmware version */ + #define AMDGPU_INFO_FW_VCN 0x0e + /* Subquery id: Query GFX RLC SRLC firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f + /* Subquery id: Query GFX RLC SRLG firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10 + /* Subquery id: Query GFX RLC SRLS firmware version */ + #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11 + /* Subquery id: Query DMCU firmware version */ + #define AMDGPU_INFO_FW_DMCU 0x12 + #define AMDGPU_INFO_FW_TA 0x13 + /* Subquery id: Query DMCUB firmware version */ + #define AMDGPU_INFO_FW_DMCUB 0x14 + /* Subquery id: Query TOC firmware version */ + #define AMDGPU_INFO_FW_TOC 0x15 + /* Subquery id: Query CAP firmware version */ + #define AMDGPU_INFO_FW_CAP 0x16 + /* Subquery id: Query GFX RLCP firmware version */ + #define AMDGPU_INFO_FW_GFX_RLCP 0x17 + /* Subquery id: Query GFX RLCV firmware version */ + #define AMDGPU_INFO_FW_GFX_RLCV 0x18 + /* Subquery id: Query MES_KIQ firmware version */ + #define AMDGPU_INFO_FW_MES_KIQ 0x19 + /* Subquery id: Query MES firmware version */ + #define AMDGPU_INFO_FW_MES 0x1a + /* Subquery id: Query IMU firmware version */ + #define AMDGPU_INFO_FW_IMU 0x1b + /* Subquery id: Query VPE firmware version */ + #define AMDGPU_INFO_FW_VPE 0x1c + +/* number of bytes moved for TTM migration */ +#define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f +/* the used VRAM size */ +#define AMDGPU_INFO_VRAM_USAGE 0x10 +/* the used GTT size */ +#define AMDGPU_INFO_GTT_USAGE 0x11 +/* Information about GDS, etc. resource configuration */ +#define AMDGPU_INFO_GDS_CONFIG 0x13 +/* Query information about VRAM and GTT domains */ +#define AMDGPU_INFO_VRAM_GTT 0x14 +/* Query information about register in MMR address space*/ +#define AMDGPU_INFO_READ_MMR_REG 0x15 +/* Query information about device: rev id, family, etc. */ +#define AMDGPU_INFO_DEV_INFO 0x16 +/* visible vram usage */ +#define AMDGPU_INFO_VIS_VRAM_USAGE 0x17 +/* number of TTM buffer evictions */ +#define AMDGPU_INFO_NUM_EVICTIONS 0x18 +/* Query memory about VRAM and GTT domains */ +#define AMDGPU_INFO_MEMORY 0x19 +/* Query vce clock table */ +#define AMDGPU_INFO_VCE_CLOCK_TABLE 0x1A +/* Query vbios related information */ +#define AMDGPU_INFO_VBIOS 0x1B + /* Subquery id: Query vbios size */ + #define AMDGPU_INFO_VBIOS_SIZE 0x1 + /* Subquery id: Query vbios image */ + #define AMDGPU_INFO_VBIOS_IMAGE 0x2 + /* Subquery id: Query vbios info */ + #define AMDGPU_INFO_VBIOS_INFO 0x3 +/* Query UVD handles */ +#define AMDGPU_INFO_NUM_HANDLES 0x1C +/* Query sensor related information */ +#define AMDGPU_INFO_SENSOR 0x1D + /* Subquery id: Query GPU shader clock */ + #define AMDGPU_INFO_SENSOR_GFX_SCLK 0x1 + /* Subquery id: Query GPU memory clock */ + #define AMDGPU_INFO_SENSOR_GFX_MCLK 0x2 + /* Subquery id: Query GPU temperature */ + #define AMDGPU_INFO_SENSOR_GPU_TEMP 0x3 + /* Subquery id: Query GPU load */ + #define AMDGPU_INFO_SENSOR_GPU_LOAD 0x4 + /* Subquery id: Query average GPU power */ + #define AMDGPU_INFO_SENSOR_GPU_AVG_POWER 0x5 + /* Subquery id: Query northbridge voltage */ + #define AMDGPU_INFO_SENSOR_VDDNB 0x6 + /* Subquery id: Query graphics voltage */ + #define AMDGPU_INFO_SENSOR_VDDGFX 0x7 + /* Subquery id: Query GPU stable pstate shader clock */ + #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK 0x8 + /* Subquery id: Query GPU stable pstate memory clock */ + #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK 0x9 + /* Subquery id: Query GPU peak pstate shader clock */ + #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK 0xa + /* Subquery id: Query GPU peak pstate memory clock */ + #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK 0xb + /* Subquery id: Query input GPU power */ + #define AMDGPU_INFO_SENSOR_GPU_INPUT_POWER 0xc +/* Number of VRAM page faults on CPU access. */ +#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E +#define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F +/* query ras mask of enabled features*/ +#define AMDGPU_INFO_RAS_ENABLED_FEATURES 0x20 +/* RAS MASK: UMC (VRAM) */ +#define AMDGPU_INFO_RAS_ENABLED_UMC (1 << 0) +/* RAS MASK: SDMA */ +#define AMDGPU_INFO_RAS_ENABLED_SDMA (1 << 1) +/* RAS MASK: GFX */ +#define AMDGPU_INFO_RAS_ENABLED_GFX (1 << 2) +/* RAS MASK: MMHUB */ +#define AMDGPU_INFO_RAS_ENABLED_MMHUB (1 << 3) +/* RAS MASK: ATHUB */ +#define AMDGPU_INFO_RAS_ENABLED_ATHUB (1 << 4) +/* RAS MASK: PCIE */ +#define AMDGPU_INFO_RAS_ENABLED_PCIE (1 << 5) +/* RAS MASK: HDP */ +#define AMDGPU_INFO_RAS_ENABLED_HDP (1 << 6) +/* RAS MASK: XGMI */ +#define AMDGPU_INFO_RAS_ENABLED_XGMI (1 << 7) +/* RAS MASK: DF */ +#define AMDGPU_INFO_RAS_ENABLED_DF (1 << 8) +/* RAS MASK: SMN */ +#define AMDGPU_INFO_RAS_ENABLED_SMN (1 << 9) +/* RAS MASK: SEM */ +#define AMDGPU_INFO_RAS_ENABLED_SEM (1 << 10) +/* RAS MASK: MP0 */ +#define AMDGPU_INFO_RAS_ENABLED_MP0 (1 << 11) +/* RAS MASK: MP1 */ +#define AMDGPU_INFO_RAS_ENABLED_MP1 (1 << 12) +/* RAS MASK: FUSE */ +#define AMDGPU_INFO_RAS_ENABLED_FUSE (1 << 13) +/* query video encode/decode caps */ +#define AMDGPU_INFO_VIDEO_CAPS 0x21 + /* Subquery id: Decode */ + #define AMDGPU_INFO_VIDEO_CAPS_DECODE 0 + /* Subquery id: Encode */ + #define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1 +/* Query the max number of IBs per gang per submission */ +#define AMDGPU_INFO_MAX_IBS 0x22 +/* query last page fault info */ +#define AMDGPU_INFO_GPUVM_FAULT 0x23 +/* query FW object size and alignment */ +#define AMDGPU_INFO_UQ_FW_AREAS 0x24 + +/* Hybrid Stack Specific Defs*/ +/* gpu capability */ +#define AMDGPU_INFO_CAPABILITY 0x50 +/* virtual range */ +#define AMDGPU_INFO_VIRTUAL_RANGE 0x51 +/* query pin memory capability */ +#define AMDGPU_CAPABILITY_PIN_MEM_FLAG (1 << 0) +/* query direct gma capability */ +#define AMDGPU_CAPABILITY_DIRECT_GMA_FLAG (1 << 1) + +#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 +#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff +#define AMDGPU_INFO_MMR_SH_INDEX_SHIFT 8 +#define AMDGPU_INFO_MMR_SH_INDEX_MASK 0xff + +struct drm_amdgpu_query_fw { + /** AMDGPU_INFO_FW_* */ + __u32 fw_type; + /** + * Index of the IP if there are more IPs of + * the same type. + */ + __u32 ip_instance; + /** + * Index of the engine. Whether this is used depends + * on the firmware type. (e.g. MEC, SDMA) + */ + __u32 index; + __u32 _pad; +}; + +/* Input structure for the INFO ioctl */ +struct drm_amdgpu_info { + /* Where the return value will be stored */ + __u64 return_pointer; + /* The size of the return value. Just like "size" in "snprintf", + * it limits how many bytes the kernel can write. */ + __u32 return_size; + /* The query request id. */ + __u32 query; + + union { + struct { + __u32 id; + __u32 _pad; + } mode_crtc; + + struct { + /** AMDGPU_HW_IP_* */ + __u32 type; + /** + * Index of the IP if there are more IPs of the same + * type. Ignored by AMDGPU_INFO_HW_IP_COUNT. + */ + __u32 ip_instance; + } query_hw_ip; + + struct { + __u32 dword_offset; + /** number of registers to read */ + __u32 count; + __u32 instance; + /** For future use, no flags defined so far */ + __u32 flags; + } read_mmr_reg; + + struct { + uint32_t aperture; + uint32_t _pad; + } virtual_range; + + struct drm_amdgpu_query_fw query_fw; + + struct { + __u32 type; + __u32 offset; + } vbios_info; + + struct { + __u32 type; + } sensor_info; + + struct { + __u32 type; + } video_cap; + }; +}; + +struct drm_amdgpu_info_gds { + /** GDS GFX partition size */ + __u32 gds_gfx_partition_size; + /** GDS compute partition size */ + __u32 compute_partition_size; + /** total GDS memory size */ + __u32 gds_total_size; + /** GWS size per GFX partition */ + __u32 gws_per_gfx_partition; + /** GSW size per compute partition */ + __u32 gws_per_compute_partition; + /** OA size per GFX partition */ + __u32 oa_per_gfx_partition; + /** OA size per compute partition */ + __u32 oa_per_compute_partition; + __u32 _pad; +}; + +struct drm_amdgpu_info_vram_gtt { + __u64 vram_size; + __u64 vram_cpu_accessible_size; + __u64 gtt_size; +}; + +struct drm_amdgpu_heap_info { + /** max. physical memory */ + __u64 total_heap_size; + + /** Theoretical max. available memory in the given heap */ + __u64 usable_heap_size; + + /** + * Number of bytes allocated in the heap. This includes all processes + * and private allocations in the kernel. It changes when new buffers + * are allocated, freed, and moved. It cannot be larger than + * heap_size. + */ + __u64 heap_usage; + + /** + * Theoretical possible max. size of buffer which + * could be allocated in the given heap + */ + __u64 max_allocation; +}; + +struct drm_amdgpu_memory_info { + struct drm_amdgpu_heap_info vram; + struct drm_amdgpu_heap_info cpu_accessible_vram; + struct drm_amdgpu_heap_info gtt; +}; + +struct drm_amdgpu_info_firmware { + __u32 ver; + __u32 feature; +}; + +struct drm_amdgpu_info_vbios { + __u8 name[64]; + __u8 vbios_pn[64]; + __u32 version; + __u32 pad; + __u8 vbios_ver_str[32]; + __u8 date[32]; +}; + +#define AMDGPU_VRAM_TYPE_UNKNOWN 0 +#define AMDGPU_VRAM_TYPE_GDDR1 1 +#define AMDGPU_VRAM_TYPE_DDR2 2 +#define AMDGPU_VRAM_TYPE_GDDR3 3 +#define AMDGPU_VRAM_TYPE_GDDR4 4 +#define AMDGPU_VRAM_TYPE_GDDR5 5 +#define AMDGPU_VRAM_TYPE_HBM 6 +#define AMDGPU_VRAM_TYPE_DDR3 7 +#define AMDGPU_VRAM_TYPE_DDR4 8 +#define AMDGPU_VRAM_TYPE_GDDR6 9 +#define AMDGPU_VRAM_TYPE_DDR5 10 +#define AMDGPU_VRAM_TYPE_LPDDR4 11 +#define AMDGPU_VRAM_TYPE_LPDDR5 12 +#define AMDGPU_VRAM_TYPE_HBM3E 13 + +#define AMDGPU_VRAM_TYPE_HBM_WIDTH 4096 + +struct drm_amdgpu_info_device { + /** PCI Device ID */ + __u32 device_id; + /** Internal chip revision: A0, A1, etc.) */ + __u32 chip_rev; + __u32 external_rev; + /** Revision id in PCI Config space */ + __u32 pci_rev; + __u32 family; + __u32 num_shader_engines; + __u32 num_shader_arrays_per_engine; + /* in KHz */ + __u32 gpu_counter_freq; + __u64 max_engine_clock; + __u64 max_memory_clock; + /* cu information */ + __u32 cu_active_number; + /* NOTE: cu_ao_mask is INVALID, DON'T use it */ + __u32 cu_ao_mask; + __u32 cu_bitmap[4][4]; + /** Render backend pipe mask. One render backend is CB+DB. */ + __u32 enabled_rb_pipes_mask; + __u32 num_rb_pipes; + __u32 num_hw_gfx_contexts; + /* PCIe version (the smaller of the GPU and the CPU/motherboard) */ + __u32 pcie_gen; + __u64 ids_flags; + /** Starting virtual address for UMDs. */ + __u64 virtual_address_offset; + /** The maximum virtual address */ + __u64 virtual_address_max; + /** Required alignment of virtual addresses. */ + __u32 virtual_address_alignment; + /** Page table entry - fragment size */ + __u32 pte_fragment_size; + __u32 gart_page_size; + /** constant engine ram size*/ + __u32 ce_ram_size; + /** video memory type info*/ + __u32 vram_type; + /** video memory bit width*/ + __u32 vram_bit_width; + /* vce harvesting instance */ + __u32 vce_harvest_config; + /* gfx double offchip LDS buffers */ + __u32 gc_double_offchip_lds_buf; + /* NGG Primitive Buffer */ + __u64 prim_buf_gpu_addr; + /* NGG Position Buffer */ + __u64 pos_buf_gpu_addr; + /* NGG Control Sideband */ + __u64 cntl_sb_buf_gpu_addr; + /* NGG Parameter Cache */ + __u64 param_buf_gpu_addr; + __u32 prim_buf_size; + __u32 pos_buf_size; + __u32 cntl_sb_buf_size; + __u32 param_buf_size; + /* wavefront size*/ + __u32 wave_front_size; + /* shader visible vgprs*/ + __u32 num_shader_visible_vgprs; + /* CU per shader array*/ + __u32 num_cu_per_sh; + /* number of tcc blocks*/ + __u32 num_tcc_blocks; + /* gs vgt table depth*/ + __u32 gs_vgt_table_depth; + /* gs primitive buffer depth*/ + __u32 gs_prim_buffer_depth; + /* max gs wavefront per vgt*/ + __u32 max_gs_waves_per_vgt; + /* PCIe number of lanes (the smaller of the GPU and the CPU/motherboard) */ + __u32 pcie_num_lanes; + /* always on cu bitmap */ + __u32 cu_ao_bitmap[4][4]; + /** Starting high virtual address for UMDs. */ + __u64 high_va_offset; + /** The maximum high virtual address */ + __u64 high_va_max; + /* gfx10 pa_sc_tile_steering_override */ + __u32 pa_sc_tile_steering_override; + /* disabled TCCs */ + __u64 tcc_disabled_mask; + __u64 min_engine_clock; + __u64 min_memory_clock; + /* The following fields are only set on gfx11+, older chips set 0. */ + __u32 tcp_cache_size; /* AKA GL0, VMEM cache */ + __u32 num_sqc_per_wgp; + __u32 sqc_data_cache_size; /* AKA SMEM cache */ + __u32 sqc_inst_cache_size; + __u32 gl1c_cache_size; + __u32 gl2c_cache_size; + __u64 mall_size; /* AKA infinity cache */ + /* high 32 bits of the rb pipes mask */ + __u32 enabled_rb_pipes_mask_hi; + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; + /* Userq IP mask (1 << AMDGPU_HW_IP_*) */ + __u32 userq_ip_mask; + __u32 pad; +}; + +struct drm_amdgpu_info_hw_ip { + /** Version of h/w IP */ + __u32 hw_ip_version_major; + __u32 hw_ip_version_minor; + /** Capabilities */ + __u64 capabilities_flags; + /** command buffer address start alignment*/ + __u32 ib_start_alignment; + /** command buffer size alignment*/ + __u32 ib_size_alignment; + /** Bitmask of available rings. Bit 0 means ring 0, etc. */ + __u32 available_rings; + /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ + __u32 ip_discovery_version; + /* Userq available slots */ + __u32 userq_num_slots; +}; + +/* GFX metadata BO sizes and alignment info (in bytes) */ +struct drm_amdgpu_info_uq_fw_areas_gfx { + /* shadow area size */ + __u32 shadow_size; + /* shadow area base virtual mem alignment */ + __u32 shadow_alignment; + /* context save area size */ + __u32 csa_size; + /* context save area base virtual mem alignment */ + __u32 csa_alignment; +}; + +/* IP specific fw related information used in the + * subquery AMDGPU_INFO_UQ_FW_AREAS + */ +struct drm_amdgpu_info_uq_fw_areas { + union { + struct drm_amdgpu_info_uq_fw_areas_gfx gfx; + }; +}; + +struct drm_amdgpu_info_num_handles { + /** Max handles as supported by firmware for UVD */ + __u32 uvd_max_handles; + /** Handles currently in use for UVD */ + __u32 uvd_used_handles; +}; + +#define AMDGPU_VCE_CLOCK_TABLE_ENTRIES 6 + +struct drm_amdgpu_info_vce_clock_table_entry { + /** System clock */ + __u32 sclk; + /** Memory clock */ + __u32 mclk; + /** VCE clock */ + __u32 eclk; + __u32 pad; +}; + +struct drm_amdgpu_info_vce_clock_table { + struct drm_amdgpu_info_vce_clock_table_entry entries[AMDGPU_VCE_CLOCK_TABLE_ENTRIES]; + __u32 num_valid_entries; + __u32 pad; +}; + +/* query video encode/decode caps */ +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2 0 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4 1 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1 2 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC 3 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC 4 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG 5 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9 6 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1 7 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT 8 + +struct drm_amdgpu_info_video_codec_info { + __u32 valid; + __u32 max_width; + __u32 max_height; + __u32 max_pixels_per_frame; + __u32 max_level; + __u32 pad; +}; + +struct drm_amdgpu_info_video_caps { + struct drm_amdgpu_info_video_codec_info codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT]; +}; + +#define AMDGPU_VMHUB_TYPE_MASK 0xff +#define AMDGPU_VMHUB_TYPE_SHIFT 0 +#define AMDGPU_VMHUB_TYPE_GFX 0 +#define AMDGPU_VMHUB_TYPE_MM0 1 +#define AMDGPU_VMHUB_TYPE_MM1 2 +#define AMDGPU_VMHUB_IDX_MASK 0xff00 +#define AMDGPU_VMHUB_IDX_SHIFT 8 + +struct drm_amdgpu_info_gpuvm_fault { + __u64 addr; + __u32 status; + __u32 vmhub; +}; + +struct drm_amdgpu_info_uq_metadata_gfx { + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; +}; + +struct drm_amdgpu_info_uq_metadata { + union { + struct drm_amdgpu_info_uq_metadata_gfx gfx; + }; +}; + +/* + * Supported GPU families + */ +#define AMDGPU_FAMILY_UNKNOWN 0 +#define AMDGPU_FAMILY_SI 110 /* Hainan, Oland, Verde, Pitcairn, Tahiti */ +#define AMDGPU_FAMILY_CI 120 /* Bonaire, Hawaii */ +#define AMDGPU_FAMILY_KV 125 /* Kaveri, Kabini, Mullins */ +#define AMDGPU_FAMILY_VI 130 /* Iceland, Tonga */ +#define AMDGPU_FAMILY_CZ 135 /* Carrizo, Stoney */ +#define AMDGPU_FAMILY_AI 141 /* Vega10 */ +#define AMDGPU_FAMILY_RV 142 /* Raven */ +#define AMDGPU_FAMILY_NV 143 /* Navi10 */ +#define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ +#define AMDGPU_FAMILY_GC_11_0_0 145 /* GC 11.0.0 */ +#define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_11_0_1 148 /* GC 11.0.1 */ +#define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ +#define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ +#define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ +#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ + +#ifndef HAVE_DRM_COLOR_CTM_3X4 +/* FIXME wrong namespace! */ +struct drm_color_ctm_3x4 { + /* + * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude + * (not two's complement!) format. + */ + __u64 matrix[12]; +}; +#endif + +/** + * Definition of System Unified Address (SUA) apertures + */ +#define AMDGPU_SUA_APERTURE_PRIVATE 1 +#define AMDGPU_SUA_APERTURE_SHARED 2 +struct drm_amdgpu_virtual_range { + uint64_t start; + uint64_t end; +}; + +struct drm_amdgpu_capability { + __u32 flag; + __u32 direct_gma_size; +}; + +/* + * Definition of free sync enter and exit signals + * We may have more options in the future + */ +#define AMDGPU_FREESYNC_FULLSCREEN_ENTER 1 +#define AMDGPU_FREESYNC_FULLSCREEN_EXIT 2 + +struct drm_amdgpu_freesync { + __u32 op; /* AMDGPU_FREESYNC_FULLSCREEN_ENTER or */ + /* AMDGPU_FREESYNC_FULLSCREEN_ENTER */ + __u32 spare[7]; +}; + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c index 96c086162..ee55bde0a 100644 --- a/plugins/amdgpu/amdgpu_plugin.c +++ b/plugins/amdgpu/amdgpu_plugin.c @@ -12,31 +12,42 @@ #include #include #include +#include +#include #include #include #include #include #include -#include #include "criu-plugin.h" #include "plugin.h" #include "criu-amdgpu.pb-c.h" +#include "util.h" +#include "util-pie.h" +#include "fdstore.h" #include "kfd_ioctl.h" #include "xmalloc.h" #include "criu-log.h" #include "files.h" +#include "pstree.h" +#include "sockets.h" +#include "rst-malloc.h" #include "common/list.h" +#include "amdgpu_drm.h" +#include "amdgpu_plugin_dmabuf.h" #include "amdgpu_plugin_drm.h" #include "amdgpu_plugin_util.h" #include "amdgpu_plugin_topology.h" +#include "amdgpu_socket_utils.h" #include "img-streamer.h" #include "image.h" #include "cr_options.h" +#include "util.h" struct vma_metadata { struct list_head list; @@ -49,13 +60,6 @@ struct vma_metadata { /************************************ Global Variables ********************************************/ -/** - * FD of KFD device used to checkpoint. On a multi-process - * tree the order of checkpointing goes from parent to child - * and so on - so saving the FD will not be overwritten - */ -static int kfd_checkpoint_fd; - static LIST_HEAD(update_vma_info_list); size_t kfd_max_buffer_size; @@ -64,6 +68,31 @@ bool plugin_added_to_inventory = false; bool plugin_disabled = false; +struct handle_id { + int handle; + int fdstore_id; +}; +struct shared_handle_ids { + int num_handles; + struct handle_id *handles; +}; +struct shared_handle_ids *shared_memory = NULL; + +static mutex_t *shared_memory_mutex; + +int current_pid; +/* + * In the case of a single process (common case), this optimization can effectively + * reduce the restore latency with parallel restore. In the case of multiple processes, + * states are already restored in parallel within different processes. Therefore, this + * optimization does not introduce further improvement and will be disabled by default + * in this case. The flag, parallel_disabled, is used to control whether the + * optimization is enabled or disabled. + */ +bool parallel_disabled = false; + +pthread_t parallel_thread = 0; +int parallel_thread_result = 0; /**************************************************************************************************/ /* Call ioctl, restarting if it is interrupted */ @@ -299,8 +328,6 @@ void getenv_size_t(const char *var, size_t *value) int sh = 0; size_t size; - pr_info("Value str: %s\n", value_str); - if (value_str) { size = (size_t)strtoul(value_str, &endp, 0); if (errno || value_str == endp) { @@ -351,6 +378,15 @@ int amdgpu_plugin_init(int stage) maps_init(&restore_maps); if (stage == CR_PLUGIN_STAGE__RESTORE) { + if (has_children(root_item)) { + pr_info("Parallel restore disabled\n"); + parallel_disabled = true; + } else { + if (install_parallel_sock() < 0) { + pr_err("Failed to install parallel socket\n"); + return -1; + } + } /* Default Values */ kfd_fw_version_check = true; kfd_sdma_fw_version_check = true; @@ -503,11 +539,11 @@ void free_and_unmap(uint64_t size, amdgpu_bo_handle h_bo, amdgpu_va_handle h_va, amdgpu_bo_free(h_bo); } -static int sdma_copy_bo(struct kfd_criu_bo_bucket bo_bucket, FILE *storage_fp, - void *buffer, size_t buffer_size, amdgpu_device_handle h_dev, - uint64_t max_copy_size, enum sdma_op_type type) +int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp, + void *buffer, size_t buffer_size, amdgpu_device_handle h_dev, + uint64_t max_copy_size, enum sdma_op_type type, bool do_not_free) { - uint64_t size, src_bo_size, dst_bo_size, buffer_bo_size, bytes_remain, buffer_space_remain; + uint64_t src_bo_size, dst_bo_size, buffer_bo_size, bytes_remain, buffer_space_remain; uint64_t gpu_addr_src, gpu_addr_dst, gpu_addr_ib, copy_src, copy_dst, copy_size; amdgpu_va_handle h_va_src, h_va_dst, h_va_ib; amdgpu_bo_handle h_bo_src, h_bo_dst, h_bo_ib; @@ -520,10 +556,8 @@ static int sdma_copy_bo(struct kfd_criu_bo_bucket bo_bucket, FILE *storage_fp, uint32_t expired; amdgpu_context_handle h_ctx; uint32_t *ib = NULL; - int j, err, shared_fd, packets_per_buffer; + int j, err, packets_per_buffer; - shared_fd = bo_bucket.dmabuf_fd; - size = bo_bucket.size; buffer_bo_size = min(size, buffer_size); packets_per_buffer = ((buffer_bo_size - 1) / max_copy_size) + 1; src_bo_size = (type == SDMA_OP_VRAM_WRITE) ? buffer_bo_size : size; @@ -734,7 +768,8 @@ err_dst_bo_map: if (err) pr_perror("dest range free failed"); err_dst_va: - err = amdgpu_bo_free(h_bo_dst); + if (!do_not_free) + err = amdgpu_bo_free(h_bo_dst); if (err) pr_perror("dest bo free failed"); err_dst_bo_prep: @@ -822,8 +857,9 @@ void *dump_bo_contents(void *_thread_data) num_bos++; /* perform sDMA based vram copy */ - ret = sdma_copy_bo(bo_buckets[i], bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, - SDMA_OP_VRAM_READ); + ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, + SDMA_OP_VRAM_READ, false); + if (ret) { pr_err("Failed to drain the BO using sDMA: bo_buckets[%d]\n", i); break; @@ -920,8 +956,8 @@ void *restore_bo_contents(void *_thread_data) num_bos++; - ret = sdma_copy_bo(bo_buckets[i], bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, - SDMA_OP_VRAM_WRITE); + ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, + SDMA_OP_VRAM_WRITE, false); if (ret) { pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i); break; @@ -1007,28 +1043,163 @@ int restore_hsakmt_shared_mem(const uint64_t shared_mem_size, const uint32_t sha return 0; } -static int unpause_process(int fd) +int amdgpu_unpause_processes(int pid) { int ret = 0; struct kfd_ioctl_criu_args args = { 0 }; + struct list_head *l = get_dumped_fds(); + struct dumped_fd *st; - args.op = KFD_CRIU_OP_UNPAUSE; + list_for_each_entry(st, l, l) { + if (st->is_drm) { + close(st->fd); + } else { + args.op = KFD_CRIU_OP_UNPAUSE; - ret = kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args); - if (ret) { - pr_perror("Failed to unpause process"); - goto exit; + ret = kmtIoctl(st->fd, AMDKFD_IOC_CRIU_OP, &args); + if (ret) { + pr_perror("Failed to unpause process"); + goto exit; + } + } } - // Reset the KFD FD - kfd_checkpoint_fd = -1; - sys_close_drm_render_devices(&src_topology); + if (post_dump_dmabuf_check() < 0) + ret = -1; exit: pr_info("Process unpaused %s (ret:%d)\n", ret ? "Failed" : "Ok", ret); + clear_dumped_fds(); return ret; } +CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__DUMP_DEVICES_LATE, amdgpu_unpause_processes) + +int store_dmabuf_fd(int handle, int fd) +{ + int id; + + id = fdstore_add(fd); + mutex_lock(shared_memory_mutex); + for (int i = 0; i < shared_memory->num_handles; i++) { + if (shared_memory->handles[i].handle == handle) { + mutex_unlock(shared_memory_mutex); + return 0; + } + if (shared_memory->handles[i].handle == -1) { + shared_memory->handles[i].handle = handle; + shared_memory->handles[i].fdstore_id = id; + mutex_unlock(shared_memory_mutex); + return 0; + } + } + mutex_unlock(shared_memory_mutex); + + return -1; +} + +int amdgpu_id_for_handle(int handle) +{ + mutex_lock(shared_memory_mutex); + for (int i = 0; i < shared_memory->num_handles; i++) { + if (shared_memory->handles[i].handle == handle) { + mutex_unlock(shared_memory_mutex); + return shared_memory->handles[i].fdstore_id; + } + } + mutex_unlock(shared_memory_mutex); + return -1; +} + +int amdgpu_restore_init(void) +{ + if (!shared_memory) { + int protection = PROT_READ | PROT_WRITE; + int visibility = MAP_SHARED | MAP_ANONYMOUS; + size_t img_size; + FILE *img_fp = NULL; + int ret; + unsigned char *buf; + int num_handles = 0; + char img_path[PATH_MAX]; + CriuRenderNode *rd = NULL; + CriuKfd *e = NULL; + + DIR *d; + struct dirent *dir; + d = opendir("."); + if (d) { + while ((dir = readdir(d)) != NULL) { + if (strncmp("amdgpu-kfd-", dir->d_name, strlen("amdgpu-kfd-")) == 0) { + img_fp = open_img_file(dir->d_name, false, &img_size); + buf = xmalloc(img_size); + if (!buf) { + fclose(img_fp); + return -ENOMEM; + } + + ret = read_fp(img_fp, buf, img_size); + if (ret) { + pr_perror("Unable to read from %s", img_path); + fclose(img_fp); + xfree(buf); + return ret; + } + + fclose(img_fp); + e = criu_kfd__unpack(NULL, img_size, buf); + num_handles += e->num_of_bos; + criu_kfd__free_unpacked(e, NULL); + xfree(buf); + } + if (strncmp("amdgpu-renderD-", dir->d_name, strlen("amdgpu-renderD-")) == 0) { + img_fp = open_img_file(dir->d_name, false, &img_size); + buf = xmalloc(img_size); + if (!buf) { + fclose(img_fp); + return -ENOMEM; + } + + ret = read_fp(img_fp, buf, img_size); + if (ret) { + pr_perror("Unable to read from %s", img_path); + fclose(img_fp); + xfree(buf); + return ret; + } + + fclose(img_fp); + rd = criu_render_node__unpack(NULL, img_size, buf); + num_handles += rd->num_of_bos; + criu_render_node__free_unpacked(rd, NULL); + xfree(buf); + } + } + closedir(d); + } + + if (num_handles > 0) { + shared_memory = mmap(NULL, sizeof(shared_memory), protection, visibility, -1, 0); + shared_memory->num_handles = num_handles; + shared_memory->handles = mmap(NULL, sizeof(struct handle_id) * num_handles, protection, visibility, -1, 0); + + for (int i = 0; i < num_handles; i++) { + shared_memory->handles[i].handle = -1; + shared_memory->handles[i].fdstore_id = -1; + } + + shared_memory_mutex = shmalloc(sizeof(*shared_memory_mutex)); + if (!shared_memory_mutex) { + pr_err("Can't create amdgpu mutex\n"); + return -1; + } + mutex_init(shared_memory_mutex); + } + } + + return 0; +} +CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESTORE_INIT, amdgpu_restore_init) static int save_devices(int fd, struct kfd_ioctl_criu_args *args, struct kfd_criu_device_bucket *device_buckets, CriuKfd *e) @@ -1072,6 +1243,8 @@ static int save_bos(int id, int fd, struct kfd_ioctl_criu_args *args, struct kfd { struct thread_data *thread_datas; int ret = 0, i; + amdgpu_device_handle h_dev; + uint32_t major, minor; pr_debug("Dumping %d BOs\n", args->num_bos); @@ -1095,6 +1268,19 @@ static int save_bos(int id, int fd, struct kfd_ioctl_criu_args *args, struct kfd boinfo->size = bo_bucket->size; boinfo->offset = bo_bucket->offset; boinfo->alloc_flags = bo_bucket->alloc_flags; + + ret = amdgpu_device_initialize(node_get_drm_render_device(sys_get_node_by_gpu_id(&src_topology, bo_bucket->gpu_id)), &major, &minor, &h_dev); + + boinfo->handle = get_gem_handle(h_dev, bo_bucket->dmabuf_fd); + + amdgpu_device_deinitialize(h_dev); + } + for (i = 0; i < e->num_of_bos; i++) { + KfdBoEntry *boinfo = e->bo_entries[i]; + + ret = record_shared_bo(boinfo->handle, false); + if (ret) + goto exit; } for (int i = 0; i < e->num_of_gpus; i++) { @@ -1215,10 +1401,17 @@ int amdgpu_plugin_dump_file(int fd, int id) return -1; } - /* Initialize number of device files that will be checkpointed */ - init_gpu_count(&src_topology); + /* Check whether this plugin was called for kfd, dmabuf or render nodes */ + ret = get_dmabuf_info(fd, &st); + if (ret < 0) { + pr_perror("Failed to get dmabuf info"); + return -1; + } + if (ret == 0) { + pr_info("Dumping dmabuf fd = %d\n", fd); + return amdgpu_plugin_dmabuf_dump(fd, id); + } - /* Check whether this plugin was called for kfd or render nodes */ if (major(st.st_rdev) != major(st_kfd.st_rdev) || minor(st.st_rdev) != 0) { /* This is RenderD dumper plugin, for now just save renderD @@ -1229,14 +1422,12 @@ int amdgpu_plugin_dump_file(int fd, int id) if (ret) return ret; - /* Invoke unpause process if needed */ - decrement_checkpoint_count(); - if (checkpoint_is_complete()) { - ret = unpause_process(kfd_checkpoint_fd); - } + ret = record_dumped_fd(fd, true); + if (ret) + return ret; /* Need to return success here so that criu can call plugins for renderD nodes */ - return ret; + return try_dump_dmabuf_list(); } pr_info("%s() called for fd = %d\n", __func__, major(st.st_rdev)); @@ -1331,14 +1522,11 @@ int amdgpu_plugin_dump_file(int fd, int id) xfree(buf); -exit: - /* Restore all queues if conditions permit */ - kfd_checkpoint_fd = fd; - decrement_checkpoint_count(); - if (checkpoint_is_complete()) { - ret = unpause_process(fd); - } + ret = record_dumped_fd(fd, false); + if (ret) + goto exit; +exit: xfree((void *)args.devices); xfree((void *)args.bos); xfree((void *)args.priv_data); @@ -1361,7 +1549,6 @@ static int restore_devices(struct kfd_ioctl_criu_args *args, CriuKfd *e) int ret = 0, bucket_index = 0; pr_debug("Restoring %d devices\n", e->num_of_gpus); - args->num_devices = e->num_of_gpus; device_buckets = xzalloc(sizeof(*device_buckets) * args->num_devices); if (!device_buckets) @@ -1434,19 +1621,37 @@ static int restore_bos(struct kfd_ioctl_criu_args *args, CriuKfd *e) } pr_info("Restore BOs Ok\n"); + + return 0; +} + +int save_vma_updates(uint64_t offset, uint64_t addr, uint64_t restored_offset, int fd) +{ + struct vma_metadata *vma_md; + + vma_md = xmalloc(sizeof(*vma_md)); + if (!vma_md) { + return -ENOMEM; + } + + memset(vma_md, 0, sizeof(*vma_md)); + + vma_md->old_pgoff = offset; + vma_md->vma_entry = addr; + + vma_md->new_pgoff = restored_offset; + vma_md->fd = fd; + + list_add_tail(&vma_md->list, &update_vma_info_list); + return 0; } static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKfd *e) { - struct thread_data *thread_datas; + struct thread_data *thread_datas = NULL; int thread_i, ret = 0; - - thread_datas = xzalloc(sizeof(*thread_datas) * e->num_of_gpus); - if (!thread_datas) { - ret = -ENOMEM; - goto exit; - } + uint64_t offset = 0; for (int i = 0; i < e->num_of_bos; i++) { struct kfd_criu_bo_bucket *bo_bucket = &bo_buckets[i]; @@ -1489,56 +1694,101 @@ static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKf } } - thread_i = 0; - for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) { - struct tp_node *dev; - int ret_thread = 0; - uint32_t target_gpu_id; + if (!parallel_disabled) { + parallel_restore_cmd restore_cmd; + pr_info("Begin to send parallel restore cmd\n"); + ret = init_parallel_restore_cmd(e->num_of_bos, id, e->num_of_gpus, &restore_cmd); + if (ret) + goto exit_parallel; - if (!e->device_entries[i]->gpu_id) - continue; + for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) { + uint32_t target_gpu_id; + struct tp_node *dev; - /* e->device_entries[i]->gpu_id is user_gpu_id, target_gpu_id is actual_gpu_id */ - target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id); + if (!e->device_entries[i]->gpu_id) + continue; - /* We need the fd for actual_gpu_id */ - dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id); - if (!dev) { - pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id); - ret = -ENODEV; + target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id); + dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id); + if (!dev) { + pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id); + ret = -ENODEV; + goto exit_parallel; + } + parallel_restore_gpu_id_add(e->device_entries[i]->gpu_id, dev->drm_render_minor, &restore_cmd); + + for (int j = 0; j < e->num_of_bos; j++) { + if (bo_buckets[j].gpu_id != e->device_entries[i]->gpu_id) + continue; + if (bo_buckets[j].alloc_flags & + (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { + parallel_restore_bo_add(bo_buckets[j].dmabuf_fd, bo_buckets[j].gpu_id, + bo_buckets[j].size, offset, &restore_cmd); + offset += bo_buckets[j].size; + } + } + } + ret = send_parallel_restore_cmd(&restore_cmd); +exit_parallel: + free_parallel_restore_cmd(&restore_cmd); + } else { + thread_datas = xzalloc(sizeof(*thread_datas) * e->num_of_gpus); + if (!thread_datas) { + ret = -ENOMEM; goto exit; } - thread_datas[thread_i].id = id; - thread_datas[thread_i].gpu_id = e->device_entries[i]->gpu_id; - thread_datas[thread_i].bo_buckets = bo_buckets; - thread_datas[thread_i].bo_entries = e->bo_entries; - thread_datas[thread_i].pid = e->pid; - thread_datas[thread_i].num_of_bos = e->num_of_bos; + thread_i = 0; + for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) { + struct tp_node *dev; + int ret_thread = 0; + uint32_t target_gpu_id; - thread_datas[thread_i].drm_fd = node_get_drm_render_device(dev); - if (thread_datas[thread_i].drm_fd < 0) { - ret = -thread_datas[thread_i].drm_fd; - goto exit; + if (!e->device_entries[i]->gpu_id) + continue; + + /* e->device_entries[i]->gpu_id is user_gpu_id, target_gpu_id is actual_gpu_id */ + target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id); + + /* We need the fd for actual_gpu_id */ + dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id); + if (!dev) { + pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id); + ret = -ENODEV; + goto exit; + } + + thread_datas[thread_i].id = id; + thread_datas[thread_i].gpu_id = e->device_entries[i]->gpu_id; + thread_datas[thread_i].bo_buckets = bo_buckets; + thread_datas[thread_i].bo_entries = e->bo_entries; + thread_datas[thread_i].pid = e->pid; + thread_datas[thread_i].num_of_bos = e->num_of_bos; + + thread_datas[thread_i].drm_fd = node_get_drm_render_device(dev); + if (thread_datas[thread_i].drm_fd < 0) { + ret = -thread_datas[thread_i].drm_fd; + goto exit; + } + + ret_thread = pthread_create(&thread_datas[thread_i].thread, NULL, restore_bo_contents, + (void *)&thread_datas[thread_i]); + if (ret_thread) { + pr_err("Failed to create thread[%i] ret:%d\n", thread_i, ret_thread); + ret = -ret_thread; + goto exit; + } + thread_i++; } - ret_thread = pthread_create(&thread_datas[thread_i].thread, NULL, restore_bo_contents, - (void *)&thread_datas[thread_i]); - if (ret_thread) { - pr_err("Failed to create thread[%i] ret:%d\n", thread_i, ret_thread); - ret = -ret_thread; - goto exit; - } - thread_i++; - } + for (int i = 0; i < e->num_of_gpus; i++) { + pthread_join(thread_datas[i].thread, NULL); + pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret); - for (int i = 0; i < e->num_of_gpus; i++) { - pthread_join(thread_datas[i].thread, NULL); - pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret); - - if (thread_datas[i].ret) { - ret = thread_datas[i].ret; - goto exit; + if (thread_datas[i].ret) { + ret = thread_datas[i].ret; + goto exit; + } } } exit: @@ -1546,12 +1796,12 @@ exit: if (bo_buckets[i].dmabuf_fd != KFD_INVALID_FD) close(bo_buckets[i].dmabuf_fd); } - - xfree(thread_datas); + if (thread_datas) + xfree(thread_datas); return ret; } -int amdgpu_plugin_restore_file(int id) +int amdgpu_plugin_restore_file(int id, bool *retry_needed) { int ret = 0, fd; char img_path[PATH_MAX]; @@ -1562,6 +1812,8 @@ int amdgpu_plugin_restore_file(int id) size_t img_size; FILE *img_fp = NULL; + *retry_needed = false; + if (plugin_disabled) return -ENOTSUP; @@ -1580,12 +1832,21 @@ int amdgpu_plugin_restore_file(int id) * first as we assume restore_maps is already filled. Need to fix this later. */ snprintf(img_path, sizeof(img_path), IMG_DRM_FILE, id); - pr_info("Restoring RenderD %s\n", img_path); img_fp = open_img_file(img_path, false, &img_size); - if (!img_fp) - return -EINVAL; - + if (!img_fp) { + ret = amdgpu_plugin_dmabuf_restore(id); + if (ret == 1) { + /* This is a dmabuf fd, but the corresponding buffer object that was + * exported to make it has not yet been restored. Need to try again + * later when the buffer object exists, so it can be re-exported. + */ + *retry_needed = true; + return 0; + } + return ret; + } + pr_info("Restoring RenderD %s\n", img_path); pr_debug("RenderD Image file size:%ld\n", img_size); buf = xmalloc(img_size); if (!buf) { @@ -1626,8 +1887,18 @@ int amdgpu_plugin_restore_file(int id) pr_info("render node destination gpu_id = 0x%04x\n", tp_node->gpu_id); fd = node_get_drm_render_device(tp_node); - if (fd < 0) + if (fd < 0) { pr_err("Failed to open render device (minor:%d)\n", tp_node->drm_render_minor); + return -1; + } + + ret = amdgpu_plugin_drm_restore_file(fd, rd); + if (ret == 1) + *retry_needed = true; + if (ret < 0) { + fd = ret; + goto fail; + } fail: criu_render_node__free_unpacked(rd, NULL); xfree(buf); @@ -1639,12 +1910,20 @@ int amdgpu_plugin_restore_file(int id) * copy of the fd. CRIU core owns the duplicated returned fd, and amdgpu_plugin owns the fd stored in * tp_node. */ - fd = dup(fd); - if (fd == -1) { - pr_perror("unable to duplicate the render fd"); - return -1; + + if (fd < 0) + return fd; + + if (!(*retry_needed)) { + fd = dup(fd); + if (fd == -1) { + pr_perror("unable to duplicate the render fd"); + return -1; + } + return fd; } - return fd; + + return 0; } fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC); @@ -1688,11 +1967,13 @@ int amdgpu_plugin_restore_file(int id) * This way, we know that the file descriptors we store will not conflict with file descriptors inside core * CRIU. */ - fd_next = find_unused_fd_pid(e->pid); - if (fd_next <= 0) { - pr_err("Failed to find unused fd (fd:%d)\n", fd_next); - ret = -EINVAL; - goto exit; + if (fd_next == -1) { + fd_next = find_unused_fd_pid(e->pid); + if (fd_next <= 0) { + pr_err("Failed to find unused fd (fd:%d)\n", fd_next); + ret = -EINVAL; + goto exit; + } } ret = devinfo_to_topology(e->device_entries, e->num_of_gpus + e->num_of_cpus, &src_topology); @@ -1725,14 +2006,26 @@ int amdgpu_plugin_restore_file(int id) args.num_objects = e->num_of_objects; args.priv_data_size = e->priv_data.len; args.priv_data = (uintptr_t)e->priv_data.data; - args.op = KFD_CRIU_OP_RESTORE; + if (kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args) == -1) { pr_perror("Restore ioctl failed"); ret = -1; goto exit; } + if (ret < 0) + goto exit; + + for (int i = 0; i < args.num_bos; i++) { + struct kfd_criu_bo_bucket *bo_bucket = &((struct kfd_criu_bo_bucket *)args.bos)[i]; + KfdBoEntry *bo_entry = e->bo_entries[i]; + + if (bo_entry->handle != -1) { + store_dmabuf_fd(bo_entry->handle, bo_bucket->dmabuf_fd); + } + } + ret = restore_bo_data(id, (struct kfd_criu_bo_bucket *)args.bos, e); if (ret) goto exit; @@ -1836,6 +2129,24 @@ int amdgpu_plugin_resume_devices_late(int target_pid) if (plugin_disabled) return -ENOTSUP; + if (!parallel_disabled) { + pr_info("Close parallel restore server\n"); + if (close_parallel_restore_server()) { + pr_err("Close parallel restore server fail\n"); + return -1; + } + + exit_code = pthread_join(parallel_thread, NULL); + if (exit_code) { + pr_err("Failed to join parallel thread ret:%d\n", exit_code); + return -1; + } + if (parallel_thread_result) { + pr_err("Parallel restore fail\n"); + return parallel_thread_result; + } + } + pr_info("Inside %s for target pid = %d\n", __func__, target_pid); fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC); @@ -1857,8 +2168,246 @@ int amdgpu_plugin_resume_devices_late(int target_pid) } } + clear_restore_state(); + close(fd); return exit_code; } CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, amdgpu_plugin_resume_devices_late) + +int init_dev(int dev_minor, amdgpu_device_handle *h_dev, uint64_t *max_copy_size) +{ + int ret = 0; + int drm_fd = -1; + uint32_t major, minor; + + struct amdgpu_gpu_info gpu_info = { 0 }; + + drm_fd = open_drm_render_device(dev_minor); + if (drm_fd < 0) { + return drm_fd; + } + + ret = amdgpu_device_initialize(drm_fd, &major, &minor, h_dev); + if (ret) { + pr_perror("Failed to initialize device"); + goto err; + } + + ret = amdgpu_query_gpu_info(*h_dev, &gpu_info); + if (ret) { + pr_perror("failed to query gpuinfo via libdrm"); + goto err; + } + *max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE : + SDMA_LINEAR_COPY_MAX_SIZE - 1; + return 0; +err: + amdgpu_device_deinitialize(*h_dev); + return ret; +} + +FILE *get_bo_contents_fp(int id, int gpu_id, size_t tot_size) +{ + char img_path[PATH_MAX]; + size_t image_size = 0; + FILE *bo_contents_fp = NULL; + + snprintf(img_path, sizeof(img_path), IMG_KFD_PAGES_FILE, id, gpu_id); + bo_contents_fp = open_img_file(img_path, false, &image_size); + if (!bo_contents_fp) { + pr_perror("Cannot fopen %s", img_path); + return NULL; + } + + if (tot_size != image_size) { + pr_err("%s size mismatch (current:%ld:expected:%ld)\n", img_path, image_size, tot_size); + fclose(bo_contents_fp); + return NULL; + } + return bo_contents_fp; +} + +struct parallel_thread_data { + pthread_t thread; + uint32_t gpu_id; + int minor; + parallel_restore_cmd *restore_cmd; + int ret; +}; + +void *parallel_restore_bo_contents(void *_thread_data) +{ + struct parallel_thread_data *thread_data = (struct parallel_thread_data *)_thread_data; + amdgpu_device_handle h_dev; + uint64_t max_copy_size; + size_t total_bo_size = 0, max_bo_size = 0, buffer_size = 0; + FILE *bo_contents_fp = NULL; + parallel_restore_entry *entry; + parallel_restore_cmd *restore_cmd = thread_data->restore_cmd; + int ret = 0; + int offset = 0; + void *buffer = NULL; + + ret = init_dev(thread_data->minor, &h_dev, &max_copy_size); + if (ret) { + goto err; + } + + for (int i = 0; i < restore_cmd->cmd_head.entry_num; i++) { + if (restore_cmd->entries[i].gpu_id == thread_data->gpu_id) { + total_bo_size += restore_cmd->entries[i].size; + max_bo_size = max(restore_cmd->entries[i].size, max_bo_size); + } + } + + buffer_size = kfd_max_buffer_size > 0 ? min(kfd_max_buffer_size, max_bo_size) : max_bo_size; + + bo_contents_fp = get_bo_contents_fp(restore_cmd->cmd_head.id, thread_data->gpu_id, total_bo_size); + if (bo_contents_fp == NULL) { + ret = -1; + goto err_sdma; + } + offset = ftell(bo_contents_fp); + + posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size); + if (!buffer) { + pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE."); + ret = -ENOMEM; + goto err_sdma; + } + + for (int i = 0; i < restore_cmd->cmd_head.entry_num; i++) { + if (restore_cmd->entries[i].gpu_id != thread_data->gpu_id) + continue; + + entry = &restore_cmd->entries[i]; + fseeko(bo_contents_fp, entry->read_offset + offset, SEEK_SET); + ret = sdma_copy_bo(restore_cmd->fds_write[entry->write_id], entry->size, bo_contents_fp, + buffer, buffer_size, h_dev, + max_copy_size, SDMA_OP_VRAM_WRITE, false); + + if (ret) { + pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i); + goto err_sdma; + } + } + +err_sdma: + if (bo_contents_fp) + fclose(bo_contents_fp); + if (buffer) + xfree(buffer); + amdgpu_device_deinitialize(h_dev); +err: + thread_data->ret = ret; + return NULL; +} + +void *restore_device_parallel_worker(void *arg) +{ + while (1) { + parallel_restore_cmd restore_cmd = { 0 }; + struct parallel_thread_data *thread_datas = NULL; + int ret; + int error_occurred = 0, join_ret = 0, created_threads = 0; + + ret = recv_parallel_restore_cmd(&restore_cmd); + if (ret) { + if (ret == 1) { + *(int *)arg = 0; + goto exit; + } + goto err; + } + + thread_datas = xzalloc(sizeof(*thread_datas) * restore_cmd.cmd_head.gpu_num); + if (!thread_datas) { + ret = -ENOMEM; + goto err; + } + + for (; created_threads < restore_cmd.cmd_head.gpu_num; created_threads++) { + thread_datas[created_threads].gpu_id = restore_cmd.gpu_ids[created_threads].gpu_id; + thread_datas[created_threads].minor = restore_cmd.gpu_ids[created_threads].minor; + thread_datas[created_threads].restore_cmd = &restore_cmd; + + ret = pthread_create(&thread_datas[created_threads].thread, NULL, parallel_restore_bo_contents, + (void *)&thread_datas[created_threads]); + if (ret) { + pr_err("Failed to create thread[0x%x] ret:%d\n", thread_datas[created_threads].gpu_id, ret); + error_occurred = 1; + break; + } + } + + for (int i = 0; i < created_threads; i++) { + join_ret = pthread_join(thread_datas[i].thread, NULL); + if (join_ret != 0) { + pr_err("pthread_join failed for Thread[0x%x] ret:%d\n", + thread_datas[i].gpu_id, join_ret); + if (!error_occurred) { + ret = join_ret; + error_occurred = 1; + } + } + + pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret); + + /* Check thread return value */ + if (thread_datas[i].ret && !error_occurred) { + ret = thread_datas[i].ret; + error_occurred = 1; + } + } + + if (thread_datas) + xfree(thread_datas); +err: + free_parallel_restore_cmd(&restore_cmd); + + if (ret) { + *(int *)arg = ret; + return NULL; + } + } +exit: + return NULL; +} + +/* + * While the background thread is running, some processing functions (e.g., stop_cgroupd) + * in the main thread need to block SIGCHLD. To prevent interference from this background + * thread, SIGCHLD is blocked in this thread. + */ +static int back_thread_create(pthread_t *newthread, void *(*f)(void *), void *arg) +{ + int ret = 0; + sigset_t blockmask, oldmask; + + sigemptyset(&blockmask); + sigaddset(&blockmask, SIGCHLD); + sigprocmask(SIG_BLOCK, &blockmask, &oldmask); + + ret = pthread_create(newthread, NULL, f, arg); + if (ret) { + pr_err("Create worker thread fail: %d\n", ret); + return -1; + } + + sigprocmask(SIG_SETMASK, &oldmask, NULL); + return 0; +} + +int amdgpu_plugin_post_forking(void) +{ + if (plugin_disabled) + return -ENOTSUP; + + if (parallel_disabled) + return 0; + + return back_thread_create(¶llel_thread, restore_device_parallel_worker, ¶llel_thread_result); +} +CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__POST_FORKING, amdgpu_plugin_post_forking) diff --git a/plugins/amdgpu/amdgpu_plugin_dmabuf.c b/plugins/amdgpu/amdgpu_plugin_dmabuf.c new file mode 100644 index 000000000..11c9792e3 --- /dev/null +++ b/plugins/amdgpu/amdgpu_plugin_dmabuf.c @@ -0,0 +1,197 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "common/list.h" +#include "criu-amdgpu.pb-c.h" + +#include "xmalloc.h" +#include "criu-log.h" +#include "amdgpu_plugin_drm.h" +#include "amdgpu_plugin_util.h" +#include "amdgpu_plugin_dmabuf.h" +#include "fdstore.h" + +#include "util.h" +#include "common/scm.h" + +struct dmabuf { + int id; + int dmabuf_fd; + struct list_head node; +}; + +static LIST_HEAD(dmabuf_list); + +/* Return < 0 for error, > 0 for "not a dmabuf" and 0 "is a dmabuf" */ +int get_dmabuf_info(int fd, struct stat *st) +{ + char path[PATH_MAX]; + + if (read_fd_link(fd, path, sizeof(path)) < 0) + return -1; + + if (strncmp(path, DMABUF_LINK, strlen(DMABUF_LINK)) != 0) + return 1; + + return 0; +} + +int __amdgpu_plugin_dmabuf_dump(int dmabuf_fd, int id) +{ + int ret = 0; + char path[PATH_MAX]; + size_t len = 0; + unsigned char *buf = NULL; + int gem_handle; + + gem_handle = handle_for_shared_bo_fd(dmabuf_fd); + if (gem_handle < 0) { + pr_err("Failed to get handle for dmabuf_fd = %d\n", dmabuf_fd); + return -EAGAIN; /* Retry needed */ + } + + CriuDmabufNode *node = xmalloc(sizeof(*node)); + if (!node) { + pr_err("Failed to allocate memory for dmabuf node\n"); + return -ENOMEM; + } + criu_dmabuf_node__init(node); + + node->gem_handle = gem_handle; + + if (node->gem_handle < 0) { + pr_err("Failed to get handle for dmabuf_fd\n"); + xfree(node); + return -EINVAL; + } + + /* Serialize metadata to a file */ + snprintf(path, sizeof(path), IMG_DMABUF_FILE, id); + len = criu_dmabuf_node__get_packed_size(node); + buf = xmalloc(len); + if (!buf) { + pr_err("Failed to allocate buffer for dmabuf metadata\n"); + xfree(node); + return -ENOMEM; + } + criu_dmabuf_node__pack(node, buf); + ret = write_img_file(path, buf, len); + + xfree(buf); + xfree(node); + return ret; +} + +int amdgpu_plugin_dmabuf_restore(int id) +{ + char path[PATH_MAX]; + size_t img_size; + FILE *img_fp = NULL; + int ret = 0; + CriuDmabufNode *rd = NULL; + unsigned char *buf = NULL; + int fd_id; + + snprintf(path, sizeof(path), IMG_DMABUF_FILE, id); + + /* Read serialized metadata */ + img_fp = open_img_file(path, false, &img_size); + if (!img_fp) { + pr_err("Failed to open dmabuf metadata file: %s\n", path); + return -EINVAL; + } + + pr_debug("dmabuf Image file size:%ld\n", img_size); + buf = xmalloc(img_size); + if (!buf) { + pr_perror("Failed to allocate memory"); + return -ENOMEM; + } + + ret = read_fp(img_fp, buf, img_size); + if (ret) { + pr_perror("Unable to read from %s", path); + xfree(buf); + return ret; + } + + rd = criu_dmabuf_node__unpack(NULL, img_size, buf); + if (rd == NULL) { + pr_perror("Unable to parse the dmabuf message %d", id); + xfree(buf); + fclose(img_fp); + return -1; + } + fclose(img_fp); + + /* Match GEM handle with shared_dmabuf list */ + fd_id = amdgpu_id_for_handle(rd->gem_handle); + if (fd_id == -1) { + pr_err("Failed to find dmabuf_fd for GEM handle = %d\n", rd->gem_handle); + return 1; + } + + int dmabuf_fd = fdstore_get(fd_id); + if (dmabuf_fd == -1) { + pr_err("Failed to find dmabuf_fd for GEM handle = %d\n", rd->gem_handle); + return 1; /* Retry needed */ + } + + pr_info("Restored dmabuf_fd = %d for GEM handle = %d\n", dmabuf_fd, rd->gem_handle); + ret = dmabuf_fd; + + pr_info("Successfully restored dmabuf_fd %d\n", dmabuf_fd); + criu_dmabuf_node__free_unpacked(rd, NULL); + xfree(buf); + return ret; +} + +int amdgpu_plugin_dmabuf_dump(int dmabuf_fd, int id) +{ + int ret; + + ret = __amdgpu_plugin_dmabuf_dump(dmabuf_fd, id); + if (ret == -EAGAIN) { + struct dmabuf *b = xmalloc(sizeof(*b)); + b->id = id; + b->dmabuf_fd = dmabuf_fd; + list_add(&b->node, &dmabuf_list); + return 0; + } + return ret; +} + +int try_dump_dmabuf_list() +{ + struct dmabuf *b, *t; + list_for_each_entry_safe(b, t, &dmabuf_list, node) { + int ret = __amdgpu_plugin_dmabuf_dump(b->dmabuf_fd, b->id); + if (ret == -EAGAIN) + continue; + if (ret) + return ret; + list_del(&b->node); + xfree(b); + } + return 0; +} + +int post_dump_dmabuf_check() +{ + if (!list_empty(&dmabuf_list)) { + pr_err("Not all dma buffers have been dumped\n"); + return -1; + } + return 0; +} diff --git a/plugins/amdgpu/amdgpu_plugin_dmabuf.h b/plugins/amdgpu/amdgpu_plugin_dmabuf.h new file mode 100644 index 000000000..f07af7ee0 --- /dev/null +++ b/plugins/amdgpu/amdgpu_plugin_dmabuf.h @@ -0,0 +1,16 @@ + +#ifndef __AMDGPU_PLUGIN_DMABUF_H__ +#define __AMDGPU_PLUGIN_DMABUF_H__ + +#include "amdgpu_plugin_util.h" +#include "criu-amdgpu.pb-c.h" + +int amdgpu_plugin_dmabuf_dump(int fd, int id); +int amdgpu_plugin_dmabuf_restore(int id); + +int try_dump_dmabuf_list(); +int post_dump_dmabuf_check(); + +int get_dmabuf_info(int fd, struct stat *st); + +#endif /* __AMDGPU_PLUGIN_DMABUF_H__ */ \ No newline at end of file diff --git a/plugins/amdgpu/amdgpu_plugin_drm.c b/plugins/amdgpu/amdgpu_plugin_drm.c index d54cd937d..3520bca7a 100644 --- a/plugins/amdgpu/amdgpu_plugin_drm.c +++ b/plugins/amdgpu/amdgpu_plugin_drm.c @@ -19,19 +19,115 @@ #include #include "common/list.h" +#include "files.h" +#include "fdstore.h" #include "criu-amdgpu.pb-c.h" +/* Define __user as empty for kernel headers in user-space */ +#define __user +#include "drm.h" + #include #include #include "xmalloc.h" -#include "criu-log.h" -#include "kfd_ioctl.h" +#include "amdgpu_drm.h" #include "amdgpu_plugin_drm.h" #include "amdgpu_plugin_util.h" #include "amdgpu_plugin_topology.h" +#include "util.h" +#include "common/scm.h" + +int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd) +{ + uint32_t handle; + int fd = amdgpu_device_get_fd(h_dev); + + if (dmabuf_fd == -1) { + return -1; + } + + if (drmPrimeFDToHandle(fd, dmabuf_fd, &handle)) + return -1; + + return handle; +} + +int drmIoctl(int fd, unsigned long request, void *arg) +{ + int ret, max_retries = 200; + + do { + ret = ioctl(fd, request, arg); + } while (ret == -1 && max_retries-- > 0 && (errno == EINTR || errno == EAGAIN)); + + if (ret == -1 && errno == EBADF) + /* In case pthread_atfork didn't catch it, this will + * make any subsequent hsaKmt calls fail in CHECK_KFD_OPEN. + */ + pr_perror("KFD file descriptor not valid in this process"); + return ret; +} + +static int allocate_bo_entries(CriuRenderNode *e, int num_bos) +{ + e->bo_entries = xmalloc(sizeof(DrmBoEntry *) * num_bos); + if (!e->bo_entries) { + pr_err("Failed to allocate bo_info\n"); + return -ENOMEM; + } + + for (int i = 0; i < num_bos; i++) { + DrmBoEntry *entry = xzalloc(sizeof(*entry)); + + if (!entry) { + pr_err("Failed to allocate botest\n"); + return -ENOMEM; + } + + drm_bo_entry__init(entry); + + e->bo_entries[i] = entry; + e->n_bo_entries++; + } + return 0; +} + +static int allocate_vm_entries(DrmBoEntry *e, int num_vms) +{ + e->vm_entries = xmalloc(sizeof(DrmVmEntry *) * num_vms); + if (!e->vm_entries) { + pr_err("Failed to allocate bo_info\n"); + return -ENOMEM; + } + + for (int i = 0; i < num_vms; i++) { + DrmVmEntry *entry = xzalloc(sizeof(*entry)); + + if (!entry) { + pr_err("Failed to allocate botest\n"); + return -ENOMEM; + } + + drm_vm_entry__init(entry); + + e->vm_entries[i] = entry; + e->n_vm_entries++; + } + return 0; +} + +static void free_e(CriuRenderNode *e) +{ + for (int i = 0; i < e->n_bo_entries; i++) { + if (e->bo_entries[i]) + xfree(e->bo_entries[i]); + } + + xfree(e); +} int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st) { @@ -60,19 +156,257 @@ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st) return 0; } +static int restore_bo_contents_drm(int drm_render_minor, CriuRenderNode *rd, int drm_fd, int *dmabufs) +{ + size_t image_size = 0, max_bo_size = 0, buffer_size; + struct amdgpu_gpu_info gpu_info = { 0 }; + amdgpu_device_handle h_dev; + uint64_t max_copy_size; + uint32_t major, minor; + FILE *bo_contents_fp = NULL; + void *buffer = NULL; + char img_path[40]; + int i, ret = 0; + + ret = amdgpu_device_initialize(drm_fd, &major, &minor, &h_dev); + if (ret) { + pr_perror("failed to initialize device"); + goto exit; + } + plugin_log_msg("libdrm initialized successfully\n"); + + ret = amdgpu_query_gpu_info(h_dev, &gpu_info); + if (ret) { + pr_perror("failed to query gpuinfo via libdrm"); + goto exit; + } + + max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE : + SDMA_LINEAR_COPY_MAX_SIZE - 1; + + for (i = 0; i < rd->num_of_bos; i++) { + if (rd->bo_entries[i]->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) { + if (rd->bo_entries[i]->size > max_bo_size) + max_bo_size = rd->bo_entries[i]->size; + } + } + + buffer_size = max_bo_size; + + posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size); + if (!buffer) { + pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE."); + ret = -ENOMEM; + goto exit; + } + + for (i = 0; i < rd->num_of_bos; i++) { + if (!(rd->bo_entries[i]->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT))) + continue; + + if (rd->bo_entries[i]->num_of_vms == 0) + continue; + + snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, rd->id, drm_render_minor, i); + + bo_contents_fp = open_img_file(img_path, false, &image_size); + + ret = sdma_copy_bo(dmabufs[i], rd->bo_entries[i]->size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, + SDMA_OP_VRAM_WRITE, true); + if (ret) { + pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i); + break; + } + plugin_log_msg("** Successfully filled the BO using sDMA: bo_buckets[%d] **\n", i); + + if (bo_contents_fp) + fclose(bo_contents_fp); + } + +exit: + for (int i = 0; i < rd->num_of_bos; i++) { + if (dmabufs[i] != KFD_INVALID_FD) + close(dmabufs[i]); + } + + xfree(buffer); + + amdgpu_device_deinitialize(h_dev); + return ret; +} int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm) { - CriuRenderNode rd = CRIU_RENDER_NODE__INIT; - struct tp_node *tp_node; + CriuRenderNode *rd = NULL; char path[PATH_MAX]; unsigned char *buf; int minor; int len; int ret; + size_t image_size; + struct tp_node *tp_node; + struct drm_amdgpu_gem_list_handles list_handles_args = { 0 }; + struct drm_amdgpu_gem_list_handles_entry *list_handles_entries; + int num_bos; + + rd = xmalloc(sizeof(*rd)); + if (!rd) { + ret = -ENOMEM; + goto exit; + } + criu_render_node__init(rd); /* Get the topology node of the DRM device */ minor = minor(drm->st_rdev); + rd->drm_render_minor = minor; + rd->id = id; + + num_bos = 8; + list_handles_entries = xzalloc(sizeof(struct drm_amdgpu_gem_list_handles_entry) * num_bos); + list_handles_args.num_entries = num_bos; + list_handles_args.entries = (uintptr_t)list_handles_entries; + + ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES, &list_handles_args); + if (ret && errno == EINVAL) { + pr_info("This kernel appears not to have AMDGPU_GEM_LIST_HANDLES ioctl. Consider disabling Dmabuf IPC or updating your kernel.\n"); + list_handles_args.num_entries = 0; + } else if (ret) { + pr_perror("Failed to call bo info ioctl"); + goto exit; + } + + if (list_handles_args.num_entries > num_bos) { + num_bos = list_handles_args.num_entries; + xfree(list_handles_entries); + list_handles_entries = xzalloc(sizeof(struct drm_amdgpu_gem_list_handles_entry) * num_bos); + list_handles_args.num_entries = num_bos; + list_handles_args.entries = (uintptr_t)list_handles_entries; + ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES, &list_handles_args); + if (ret) { + pr_perror("Failed to call bo info ioctl"); + goto exit; + } + } else { + num_bos = list_handles_args.num_entries; + } + + rd->num_of_bos = num_bos; + ret = allocate_bo_entries(rd, num_bos); + if (ret) + goto exit; + + for (int i = 0; i < num_bos; i++) { + int num_vm_entries = 8; + struct drm_amdgpu_gem_vm_entry *vm_info_entries; + struct drm_amdgpu_gem_op vm_info_args = { 0 }; + DrmBoEntry *boinfo = rd->bo_entries[i]; + struct drm_amdgpu_gem_list_handles_entry handle_entry = list_handles_entries[i]; + union drm_amdgpu_gem_mmap mmap_args = { 0 }; + int dmabuf_fd; + uint32_t major, minor; + amdgpu_device_handle h_dev; + void *buffer = NULL; + char img_path[40]; + FILE *bo_contents_fp = NULL; + int device_fd; + + boinfo->size = handle_entry.size; + + boinfo->alloc_flags = handle_entry.alloc_flags; + boinfo->preferred_domains = handle_entry.preferred_domains; + boinfo->alignment = handle_entry.alignment; + boinfo->handle = handle_entry.gem_handle; + boinfo->is_import = (handle_entry.flags & AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT) || shared_bo_has_exporter(boinfo->handle); + + mmap_args.in.handle = boinfo->handle; + + if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &mmap_args) == -1) { + pr_perror("Error Failed to call mmap ioctl"); + ret = -1; + goto exit; + } + + boinfo->offset = mmap_args.out.addr_ptr; + + vm_info_entries = xzalloc(sizeof(struct drm_amdgpu_gem_vm_entry) * num_vm_entries); + vm_info_args.handle = handle_entry.gem_handle; + vm_info_args.num_entries = num_vm_entries; + vm_info_args.value = (uintptr_t)vm_info_entries; + vm_info_args.op = AMDGPU_GEM_OP_GET_MAPPING_INFO; + ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_OP, &vm_info_args); + if (ret) { + pr_perror("Failed to call vm info ioctl"); + goto exit; + } + + if (vm_info_args.num_entries > num_vm_entries) { + num_vm_entries = vm_info_args.num_entries; + xfree(vm_info_entries); + vm_info_entries = xzalloc(sizeof(struct drm_amdgpu_gem_vm_entry) * num_vm_entries); + vm_info_args.handle = handle_entry.gem_handle; + vm_info_args.num_entries = num_vm_entries; + vm_info_args.value = (uintptr_t)vm_info_entries; + vm_info_args.op = AMDGPU_GEM_OP_GET_MAPPING_INFO; + ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_OP, &vm_info_args); + if (ret) { + pr_perror("Failed to call vm info ioctl"); + goto exit; + } + } else { + num_vm_entries = vm_info_args.num_entries; + } + + boinfo->num_of_vms = num_vm_entries; + ret = allocate_vm_entries(boinfo, num_vm_entries); + if (ret) + goto exit; + + for (int j = 0; j < num_vm_entries; j++) { + DrmVmEntry *vminfo = boinfo->vm_entries[j]; + + boinfo->addr = vm_info_entries[j].addr; + vminfo->addr = vm_info_entries[j].addr; + vminfo->size = vm_info_entries[j].size; + vminfo->offset = vm_info_entries[j].offset; + vminfo->flags = vm_info_entries[j].flags; + } + + ret = amdgpu_device_initialize(fd, &major, &minor, &h_dev); + + device_fd = amdgpu_device_get_fd(h_dev); + + drmPrimeHandleToFD(device_fd, boinfo->handle, 0, &dmabuf_fd); + + snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, rd->id, rd->drm_render_minor, i); + bo_contents_fp = open_img_file(img_path, true, &image_size); + + posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), handle_entry.size); + + ret = sdma_copy_bo(dmabuf_fd, handle_entry.size, bo_contents_fp, buffer, handle_entry.size, h_dev, 0x1000, + SDMA_OP_VRAM_READ, false); + + if (dmabuf_fd != KFD_INVALID_FD) + close(dmabuf_fd); + + if (bo_contents_fp) + fclose(bo_contents_fp); + + ret = amdgpu_device_deinitialize(h_dev); + if (ret) + goto exit; + + xfree(vm_info_entries); + } + xfree(list_handles_entries); + + for (int i = 0; i < num_bos; i++) { + DrmBoEntry *boinfo = rd->bo_entries[i]; + + ret = record_shared_bo(boinfo->handle, boinfo->is_import); + if (ret) + goto exit; + } + tp_node = sys_get_node_by_render_minor(&src_topology, minor); if (!tp_node) { pr_err("Failed to find a device with minor number = %d\n", minor); @@ -80,21 +414,156 @@ int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm) } /* Get the GPU_ID of the DRM device */ - rd.gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id); - if (!rd.gpu_id) { - pr_err("Failed to find valid gpu_id for the device = %d\n", rd.gpu_id); + rd->gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id); + if (!rd->gpu_id) { + pr_err("Failed to find valid gpu_id for the device = %d\n", rd->gpu_id); return -ENODEV; } - len = criu_render_node__get_packed_size(&rd); + len = criu_render_node__get_packed_size(rd); buf = xmalloc(len); if (!buf) return -ENOMEM; - criu_render_node__pack(&rd, buf); + criu_render_node__pack(rd, buf); snprintf(path, sizeof(path), IMG_DRM_FILE, id); ret = write_img_file(path, buf, len); + xfree(buf); +exit: + free_e(rd); return ret; } + +int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd) +{ + int ret = 0; + bool retry_needed = false; + uint32_t major, minor; + amdgpu_device_handle h_dev; + int device_fd; + int *dmabufs = xzalloc(sizeof(int) * rd->num_of_bos); + + ret = amdgpu_device_initialize(fd, &major, &minor, &h_dev); + if (ret) { + pr_info("Error in init amdgpu device\n"); + goto exit; + } + + device_fd = amdgpu_device_get_fd(h_dev); + + for (int i = 0; i < rd->num_of_bos; i++) { + DrmBoEntry *boinfo = rd->bo_entries[i]; + int dmabuf_fd = -1; + uint32_t handle; + struct drm_gem_change_handle change_args = { 0 }; + union drm_amdgpu_gem_mmap mmap_args = { 0 }; + struct drm_amdgpu_gem_va va_args = { 0 }; + int fd_id; + + if (work_already_completed(boinfo->handle, rd->drm_render_minor)) { + continue; + } else if (boinfo->handle != -1) { + if (boinfo->is_import) { + fd_id = amdgpu_id_for_handle(boinfo->handle); + if (fd_id == -1) { + retry_needed = true; + continue; + } + dmabuf_fd = fdstore_get(fd_id); + } + } + + if (boinfo->is_import) { + drmPrimeFDToHandle(device_fd, dmabuf_fd, &handle); + } else { + union drm_amdgpu_gem_create create_args = { 0 }; + + create_args.in.bo_size = boinfo->size; + create_args.in.alignment = boinfo->alignment; + create_args.in.domains = boinfo->preferred_domains; + create_args.in.domain_flags = boinfo->alloc_flags; + + if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_CREATE, &create_args) == -1) { + pr_perror("Error Failed to call create ioctl"); + ret = -1; + goto exit; + } + handle = create_args.out.handle; + + drmPrimeHandleToFD(device_fd, handle, 0, &dmabuf_fd); + } + + change_args.handle = handle; + change_args.new_handle = boinfo->handle; + + if (drmIoctl(fd, DRM_IOCTL_GEM_CHANGE_HANDLE, &change_args) == -1) { + pr_perror("Error Failed to call change ioctl; check if the kernel has DRM_IOCTL_GEM_CHANGE_HANDLE support"); + ret = -1; + goto exit; + } + + if (!boinfo->is_import) + store_dmabuf_fd(boinfo->handle, dmabuf_fd); + + dmabufs[i] = dmabuf_fd; + + ret = record_completed_work(boinfo->handle, rd->drm_render_minor); + if (ret) + goto exit; + + mmap_args.in.handle = boinfo->handle; + + if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &mmap_args) == -1) { + pr_perror("Error Failed to call mmap ioctl"); + ret = -1; + goto exit; + } + + for (int j = 0; j < boinfo->num_of_vms; j++) { + DrmVmEntry *vminfo = boinfo->vm_entries[j]; + + va_args.handle = boinfo->handle; + va_args.operation = AMDGPU_VA_OP_MAP; + va_args.flags = vminfo->flags; + va_args.va_address = vminfo->addr; + va_args.offset_in_bo = vminfo->offset; + va_args.map_size = vminfo->size; + + if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_VA, &va_args) == -1) { + pr_perror("Error Failed to call gem va ioctl"); + ret = -1; + goto exit; + } + } + + ret = save_vma_updates(boinfo->offset, boinfo->addr, mmap_args.out.addr_ptr, fd); + if (ret < 0) + goto exit; + } + + if (ret) { + pr_info("Error in deinit amdgpu device\n"); + goto exit; + } + + ret = record_completed_work(-1, rd->drm_render_minor); + if (ret) + goto exit; + + ret = amdgpu_device_deinitialize(h_dev); + + if (rd->num_of_bos > 0) { + ret = restore_bo_contents_drm(rd->drm_render_minor, rd, fd, dmabufs); + if (ret) + goto exit; + } + +exit: + if (ret < 0) + return ret; + xfree(dmabufs); + + return retry_needed; +} diff --git a/plugins/amdgpu/amdgpu_plugin_drm.h b/plugins/amdgpu/amdgpu_plugin_drm.h index 6f0c1a9a6..c766def56 100644 --- a/plugins/amdgpu/amdgpu_plugin_drm.h +++ b/plugins/amdgpu/amdgpu_plugin_drm.h @@ -24,5 +24,17 @@ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *drm); */ int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm); +int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd); + +int amdgpu_plugin_drm_unpause_file(int fd); + +int amdgpu_id_for_handle(int handle); + +int store_dmabuf_fd(int handle, int fd); + +int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd); + +int save_vma_updates(uint64_t offset, uint64_t addr, uint64_t restored_offset, int gpu_id); + #endif /* __AMDGPU_PLUGIN_DRM_H__ */ diff --git a/plugins/amdgpu/amdgpu_plugin_topology.c b/plugins/amdgpu/amdgpu_plugin_topology.c index 5b4396a0c..730f2e028 100644 --- a/plugins/amdgpu/amdgpu_plugin_topology.c +++ b/plugins/amdgpu/amdgpu_plugin_topology.c @@ -45,7 +45,7 @@ bool kfd_capability_check = true; */ int fd_next = -1; -static int open_drm_render_device(int minor) +int open_drm_render_device(int minor) { char path[128]; int fd, ret_fd; diff --git a/plugins/amdgpu/amdgpu_plugin_topology.h b/plugins/amdgpu/amdgpu_plugin_topology.h index c890e3dda..e19f8e7ce 100644 --- a/plugins/amdgpu/amdgpu_plugin_topology.h +++ b/plugins/amdgpu/amdgpu_plugin_topology.h @@ -118,6 +118,7 @@ struct tp_node *sys_get_node_by_gpu_id(const struct tp_system *sys, const uint32 struct tp_node *sys_get_node_by_render_minor(const struct tp_system *sys, const int drm_render_minor); struct tp_node *sys_get_node_by_index(const struct tp_system *sys, uint32_t index); +int open_drm_render_device(int minor); int node_get_drm_render_device(struct tp_node *node); void sys_close_drm_render_devices(struct tp_system *sys); diff --git a/plugins/amdgpu/amdgpu_plugin_util.c b/plugins/amdgpu/amdgpu_plugin_util.c index a165fc9cd..592562474 100644 --- a/plugins/amdgpu/amdgpu_plugin_util.c +++ b/plugins/amdgpu/amdgpu_plugin_util.c @@ -37,9 +37,11 @@ #include "amdgpu_drm.h" #include "amdgpu_plugin_util.h" #include "amdgpu_plugin_topology.h" +#include "amdgpu_plugin_drm.h" -/* Tracks number of device files that need to be checkpointed */ -static int dev_file_cnt = 0; +static LIST_HEAD(dumped_fds); +static LIST_HEAD(shared_bos); +static LIST_HEAD(completed_work); /* Helper structures to encode device topology of SRC and DEST platforms */ struct tp_system src_topology; @@ -49,23 +51,145 @@ struct tp_system dest_topology; struct device_maps checkpoint_maps; struct device_maps restore_maps; -bool checkpoint_is_complete() +int record_dumped_fd(int fd, bool is_drm) { - return (dev_file_cnt == 0); + int newfd = dup(fd); + + if (newfd < 0) + return newfd; + struct dumped_fd *st = malloc(sizeof(struct dumped_fd)); + if (!st) + return -1; + st->fd = newfd; + st->is_drm = is_drm; + list_add(&st->l, &dumped_fds); + + return 0; } -void decrement_checkpoint_count() +struct list_head *get_dumped_fds() { - dev_file_cnt--; + return &dumped_fds; } -void init_gpu_count(struct tp_system *topo) +bool shared_bo_has_exporter(int handle) { - if (dev_file_cnt != 0) - return; + struct shared_bo *bo; - /* We add ONE to include checkpointing of KFD device */ - dev_file_cnt = 1 + topology_gpu_count(topo); + if (handle == -1) + return false; + + list_for_each_entry(bo, &shared_bos, l) { + if (bo->handle == handle) { + return bo->has_exporter; + } + } + + return false; +} + +int record_shared_bo(int handle, bool is_imported) +{ + struct shared_bo *bo; + + if (handle == -1) + return 0; + + list_for_each_entry(bo, &shared_bos, l) { + if (bo->handle == handle) { + return 0; + } + } + bo = malloc(sizeof(struct shared_bo)); + if (!bo) + return -1; + bo->handle = handle; + bo->has_exporter = !is_imported; + list_add(&bo->l, &shared_bos); + + return 0; +} + +int handle_for_shared_bo_fd(int fd) +{ + struct dumped_fd *df; + int trial_handle; + amdgpu_device_handle h_dev; + uint32_t major, minor; + struct shared_bo *bo; + + list_for_each_entry(df, &dumped_fds, l) { + /* see if the gem handle for fd using the hdev for df->fd is the + same as bo->handle. */ + + if (!df->is_drm) { + continue; + } + + if (amdgpu_device_initialize(df->fd, &major, &minor, &h_dev)) { + pr_err("Failed to initialize amdgpu device\n"); + continue; + } + + trial_handle = get_gem_handle(h_dev, fd); + if (trial_handle < 0) + continue; + + list_for_each_entry(bo, &shared_bos, l) { + if (bo->handle == trial_handle) + return trial_handle; + } + + amdgpu_device_deinitialize(h_dev); + } + + return -1; +} + +int record_completed_work(int handle, int id) +{ + struct restore_completed_work *work; + + work = malloc(sizeof(struct restore_completed_work)); + if (!work) + return -1; + work->handle = handle; + work->id = id; + list_add(&work->l, &completed_work); + + return 0; +} + +bool work_already_completed(int handle, int id) +{ + struct restore_completed_work *work; + + list_for_each_entry(work, &completed_work, l) { + if (work->handle == handle && work->id == id) { + return true; + } + } + + return false; +} + +void clear_restore_state() +{ + while (!list_empty(&completed_work)) { + struct restore_completed_work *st = list_first_entry(&completed_work, struct restore_completed_work, l); + list_del(&st->l); + free(st); + } +} + +void clear_dumped_fds() +{ + while (!list_empty(&dumped_fds)) { + struct dumped_fd *st = list_first_entry(&dumped_fds, struct dumped_fd, l); + list_del(&st->l); + close(st->fd); + free(st); + } } int read_fp(FILE *fp, void *buf, const size_t buf_len) diff --git a/plugins/amdgpu/amdgpu_plugin_util.h b/plugins/amdgpu/amdgpu_plugin_util.h index aacca3a28..f5f752d0b 100644 --- a/plugins/amdgpu/amdgpu_plugin_util.h +++ b/plugins/amdgpu/amdgpu_plugin_util.h @@ -1,6 +1,8 @@ #ifndef __AMDGPU_PLUGIN_UTIL_H__ #define __AMDGPU_PLUGIN_UTIL_H__ +#include + #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif @@ -51,14 +53,18 @@ /* Name of file having serialized data of DRM device */ #define IMG_DRM_FILE "amdgpu-renderD-%d.img" +/* Name of file having serialized data of dmabuf meta */ +#define IMG_DMABUF_FILE "amdgpu-dmabuf_%d.img" + /* Name of file having serialized data of DRM device buffer objects (BOs) */ -#define IMG_DRM_PAGES_FILE "amdgpu-drm-pages-%d-%04x.img" +#define IMG_DRM_PAGES_FILE "amdgpu-drm-pages-%d-%d-%04x.img" /* Helper macros to Checkpoint and Restore a ROCm file */ #define HSAKMT_SHM_PATH "/dev/shm/hsakmt_shared_mem" #define HSAKMT_SHM "/hsakmt_shared_mem" #define HSAKMT_SEM_PATH "/dev/shm/sem.hsakmt_semaphore" #define HSAKMT_SEM "hsakmt_semaphore" +#define DMABUF_LINK "/dmabuf" /* Help macros to build sDMA command packets */ #define SDMA_PACKET(op, sub_op, e) ((((e)&0xFFFF) << 16) | (((sub_op)&0xFF) << 8) | (((op)&0xFF) << 0)) @@ -73,6 +79,24 @@ enum sdma_op_type { SDMA_OP_VRAM_WRITE, }; +struct dumped_fd { + struct list_head l; + int fd; + bool is_drm; +}; + +struct shared_bo { + struct list_head l; + int handle; + bool has_exporter; +}; + +struct restore_completed_work { + struct list_head l; + int handle; + int id; +}; + /* Helper structures to encode device topology of SRC and DEST platforms */ extern struct tp_system src_topology; extern struct tp_system dest_topology; @@ -97,10 +121,25 @@ int read_file(const char *file_path, void *buf, const size_t buf_len); int write_img_file(char *path, const void *buf, const size_t buf_len); FILE *open_img_file(char *path, bool write, size_t *size); -bool checkpoint_is_complete(); -void decrement_checkpoint_count(); -void init_gpu_count(struct tp_system *topology); +int record_dumped_fd(int fd, bool is_drm); +struct list_head *get_dumped_fds(); +void clear_dumped_fds(); + +bool shared_bo_has_exporter(int handle); +int record_shared_bo(int handle, bool is_imported); +int handle_for_shared_bo_fd(int dmabuf_fd); + +int record_completed_work(int handle, int id); +bool work_already_completed(int handle, int id); + +void clear_restore_state(); void print_kfd_bo_stat(int bo_cnt, struct kfd_criu_bo_bucket *bo_list); +int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp, + void *buffer, size_t buffer_size, amdgpu_device_handle h_dev, + uint64_t max_copy_size, enum sdma_op_type type, bool do_not_free); + +int serve_out_dmabuf_fd(int handle, int fd); + #endif /* __AMDGPU_PLUGIN_UTIL_H__ */ diff --git a/plugins/amdgpu/amdgpu_socket_utils.c b/plugins/amdgpu/amdgpu_socket_utils.c new file mode 100644 index 000000000..c8bf6d1ba --- /dev/null +++ b/plugins/amdgpu/amdgpu_socket_utils.c @@ -0,0 +1,320 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "amdgpu_socket_utils.h" +#include "criu-log.h" +#include "common/scm.h" +#include "fdstore.h" +#include "util-pie.h" +#include "util.h" + +int parallel_socket_addr_len; +struct sockaddr_un parallel_socket_addr; +int parallel_socket_id = 0; + +static void amdgpu_socket_name_gen(struct sockaddr_un *addr, int *len) +{ + addr->sun_family = AF_UNIX; + snprintf(addr->sun_path, UNIX_PATH_MAX, "x/criu-amdgpu-parallel-%s", criu_run_id); + *len = SUN_LEN(addr); + *addr->sun_path = '\0'; +} + +int install_parallel_sock(void) +{ + int ret = 0; + int sock_fd; + + sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (sock_fd < 0) { + pr_perror("socket creation failed"); + return -1; + } + + amdgpu_socket_name_gen(¶llel_socket_addr, ¶llel_socket_addr_len); + ret = bind(sock_fd, (struct sockaddr *)¶llel_socket_addr, parallel_socket_addr_len); + if (ret < 0) { + pr_perror("bind failed"); + goto err; + } + + ret = listen(sock_fd, SOMAXCONN); + if (ret < 0) { + pr_perror("listen failed"); + goto err; + } + + parallel_socket_id = fdstore_add(sock_fd); + if (parallel_socket_id < 0) { + ret = -1; + goto err; + } +err: + close(sock_fd); + return ret; +} + +void parallel_restore_bo_add(int dmabuf_fd, int gpu_id, uint64_t size, uint64_t offset, + parallel_restore_cmd *restore_cmd) +{ + parallel_restore_entry *restore_entry = &restore_cmd->entries[restore_cmd->cmd_head.entry_num]; + restore_entry->gpu_id = gpu_id; + restore_entry->write_id = restore_cmd->cmd_head.fd_write_num; + restore_entry->write_offset = 0; + restore_entry->read_offset = offset; + restore_entry->size = size; + + restore_cmd->fds_write[restore_cmd->cmd_head.fd_write_num] = dmabuf_fd; + + restore_cmd->cmd_head.entry_num += 1; + restore_cmd->cmd_head.fd_write_num += 1; +} + +void parallel_restore_gpu_id_add(int gpu_id, int minor, parallel_restore_cmd *restore_cmd) +{ + restore_cmd->gpu_ids[restore_cmd->cmd_head.gpu_num] = (parallel_gpu_info){ gpu_id, minor }; + restore_cmd->cmd_head.gpu_num += 1; +} + +static int send_metadata(int sock_fd, parallel_restore_cmd *restore_cmd) +{ + if (send(sock_fd, &restore_cmd->cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) { + pr_perror("Send parallel restore command head fail"); + return -1; + } + return 0; +} + +static int send_gpu_ids(int sock_fd, parallel_restore_cmd *restore_cmd) +{ + if (send(sock_fd, restore_cmd->gpu_ids, restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info), 0) < 0) { + pr_perror("Send GPU ids of parallel restore command fail"); + return -1; + } + return 0; +} + +static int send_cmds(int sock_fd, parallel_restore_cmd *restore_cmd) +{ + if (send(sock_fd, restore_cmd->entries, restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry), 0) < 0) { + pr_perror("Send parallel restore command fail"); + return -1; + } + return 0; +} + +static int send_dmabuf_fds(int sock_fd, parallel_restore_cmd *restore_cmd) +{ + if (send_fds(sock_fd, NULL, 0, restore_cmd->fds_write, restore_cmd->cmd_head.fd_write_num, 0, 0) < 0) { + pr_perror("Send dmabuf fds fail"); + return -1; + } + return 0; +} + +int send_parallel_restore_cmd(parallel_restore_cmd *restore_cmd) +{ + int sock_fd; + int ret = 0; + + sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (sock_fd < 0) { + pr_perror("Socket creation failed"); + return -1; + } + + ret = connect(sock_fd, (struct sockaddr *)¶llel_socket_addr, parallel_socket_addr_len); + if (ret < 0) { + pr_perror("Connect failed"); + goto err; + } + + ret = send_metadata(sock_fd, restore_cmd); + if (ret) { + goto err; + } + + ret = send_gpu_ids(sock_fd, restore_cmd); + if (ret) { + goto err; + } + + ret = send_cmds(sock_fd, restore_cmd); + if (ret) { + goto err; + } + + ret = send_dmabuf_fds(sock_fd, restore_cmd); + +err: + close(sock_fd); + return ret; +} + +int init_parallel_restore_cmd(int num, int id, int gpu_num, parallel_restore_cmd *restore_cmd) +{ + restore_cmd->cmd_head.id = id; + restore_cmd->cmd_head.fd_write_num = 0; + restore_cmd->cmd_head.entry_num = 0; + restore_cmd->cmd_head.gpu_num = 0; + + restore_cmd->gpu_ids = xzalloc(gpu_num * sizeof(parallel_gpu_info)); + if (!restore_cmd->gpu_ids) + return -ENOMEM; + restore_cmd->fds_write = xzalloc(num * sizeof(int)); + if (!restore_cmd->fds_write) + return -ENOMEM; + restore_cmd->entries = xzalloc(num * sizeof(parallel_restore_entry)); + if (!restore_cmd->entries) + return -ENOMEM; + return 0; +} + +void free_parallel_restore_cmd(parallel_restore_cmd *restore_cmd) +{ + if (restore_cmd->gpu_ids) + xfree(restore_cmd->gpu_ids); + if (restore_cmd->fds_write) + xfree(restore_cmd->fds_write); + if (restore_cmd->entries) + xfree(restore_cmd->entries); +} + +static int init_parallel_restore_cmd_by_head(parallel_restore_cmd *restore_cmd) +{ + restore_cmd->gpu_ids = xzalloc(restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info)); + if (!restore_cmd->gpu_ids) + return -ENOMEM; + restore_cmd->fds_write = xzalloc(restore_cmd->cmd_head.fd_write_num * sizeof(int)); + if (!restore_cmd->fds_write) + return -ENOMEM; + restore_cmd->entries = xzalloc(restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry)); + if (!restore_cmd->entries) + return -ENOMEM; + return 0; +} + +static int check_quit_cmd(parallel_restore_cmd *restore_cmd) +{ + return restore_cmd->cmd_head.fd_write_num == 0; +} + +static int recv_metadata(int client_fd, parallel_restore_cmd *restore_cmd) +{ + if (recv(client_fd, &restore_cmd->cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) { + pr_perror("Recv parallel restore command head fail"); + return -1; + } + return 0; +} + +static int recv_cmds(int client_fd, parallel_restore_cmd *restore_cmd) +{ + if (recv(client_fd, restore_cmd->entries, restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry), 0) < 0) { + pr_perror("Recv parallel restore command fail"); + return -1; + } + return 0; +} + +static int recv_gpu_ids(int sock_fd, parallel_restore_cmd *restore_cmd) +{ + if (recv(sock_fd, restore_cmd->gpu_ids, restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info), 0) < 0) { + pr_perror("Send GPU ids of parallel restore command fail"); + return -1; + } + return 0; +} + +static int recv_dmabuf_fds(int client_fd, parallel_restore_cmd *restore_cmd) +{ + if (recv_fds(client_fd, restore_cmd->fds_write, restore_cmd->cmd_head.fd_write_num, 0, 0) < 0) { + pr_perror("Recv dmabuf fds fail"); + return -1; + } + return 0; +} + +int recv_parallel_restore_cmd(parallel_restore_cmd *restore_cmd) +{ + int sock_fd, client_fd; + int ret = 0; + + sock_fd = fdstore_get(parallel_socket_id); + if (sock_fd < 0) + return -1; + + client_fd = accept(sock_fd, NULL, NULL); + if (client_fd < 0) { + ret = client_fd; + goto err_accept; + } + + ret = recv_metadata(client_fd, restore_cmd); + if (ret) { + goto err; + } + + // Return 1 to quit + if (check_quit_cmd(restore_cmd)) { + ret = 1; + goto err; + } + + ret = init_parallel_restore_cmd_by_head(restore_cmd); + if (ret) { + goto err; + } + + ret = recv_gpu_ids(client_fd, restore_cmd); + if (ret) { + goto err; + } + + ret = recv_cmds(client_fd, restore_cmd); + if (ret) { + goto err; + } + + ret = recv_dmabuf_fds(client_fd, restore_cmd); + +err: + close(client_fd); +err_accept: + close(sock_fd); + return ret; +} + +int close_parallel_restore_server(void) +{ + int sock_fd; + int ret = 0; + parallel_restore_cmd_head cmd_head; + + sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (sock_fd < 0) { + pr_perror("Socket creation failed"); + return -1; + } + + ret = connect(sock_fd, (struct sockaddr *)¶llel_socket_addr, parallel_socket_addr_len); + if (ret < 0) { + pr_perror("Connect failed"); + goto err; + } + + memset(&cmd_head, 0, sizeof(parallel_restore_cmd_head)); + if (send(sock_fd, &cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) { + pr_perror("Send parallel restore command head fail"); + return -1; + } + +err: + close(sock_fd); + return ret; +} \ No newline at end of file diff --git a/plugins/amdgpu/amdgpu_socket_utils.h b/plugins/amdgpu/amdgpu_socket_utils.h new file mode 100644 index 000000000..d7200c6bd --- /dev/null +++ b/plugins/amdgpu/amdgpu_socket_utils.h @@ -0,0 +1,54 @@ +#ifndef __KFD_PLUGIN_AMDGPU_SOCKET_UTILS_H__ +#define __KFD_PLUGIN_AMDGPU_SOCKET_UTILS_H__ + +typedef struct { + int id; + int fd_write_num; /* The number of buffer objects to be restored. */ + int entry_num; /* The number of restore commands.*/ + int gpu_num; +} parallel_restore_cmd_head; + +typedef struct { + int gpu_id; + int minor; +} parallel_gpu_info; + +typedef struct { + int gpu_id; + int write_id; + uint64_t read_offset; + uint64_t write_offset; + uint64_t size; +} parallel_restore_entry; + +typedef struct { + parallel_restore_cmd_head cmd_head; + int *fds_write; + parallel_gpu_info *gpu_ids; + parallel_restore_entry *entries; +} parallel_restore_cmd; + +/* + * For parallel_restore, a background thread in the main CRIU process is used to restore the GPU + * buffer object. However, initially, the ownership of these buffer objects and the metadata for + * restoration are all with the target process. Therefore, we introduce a series of functions to + * help the target process send these tasks to the main CRIU process. + */ +int init_parallel_restore_cmd(int num, int id, int gpu_num, parallel_restore_cmd *restore_cmd); + +void free_parallel_restore_cmd(parallel_restore_cmd *restore_cmd); + +int install_parallel_sock(void); + +int send_parallel_restore_cmd(parallel_restore_cmd *restore_cmd); + +int recv_parallel_restore_cmd(parallel_restore_cmd *restore_cmd); + +void parallel_restore_bo_add(int dmabuf_fd, int gpu_id, uint64_t size, uint64_t offset, + parallel_restore_cmd *restore_cmd); + +void parallel_restore_gpu_id_add(int gpu_id, int minor, parallel_restore_cmd *restore_cmd); + +int close_parallel_restore_server(void); + +#endif \ No newline at end of file diff --git a/plugins/amdgpu/criu-amdgpu.proto b/plugins/amdgpu/criu-amdgpu.proto index 078b67650..7682a8f21 100644 --- a/plugins/amdgpu/criu-amdgpu.proto +++ b/plugins/amdgpu/criu-amdgpu.proto @@ -46,6 +46,7 @@ message kfd_bo_entry { required uint64 offset = 3; required uint32 alloc_flags = 4; required uint32 gpu_id = 5; + required uint32 handle = 6; } message criu_kfd { @@ -61,6 +62,34 @@ message criu_kfd { required bytes priv_data = 10; } +message drm_bo_entry { + required uint64 addr = 1; + required uint64 size = 2; + required uint64 offset = 3; + required uint64 alloc_flags = 4; + required uint64 alignment = 5; + required uint32 preferred_domains = 6; + required uint32 handle = 7; + required uint32 is_import = 8; + required uint32 num_of_vms = 9; + repeated drm_vm_entry vm_entries = 10; +} + +message drm_vm_entry { + required uint64 addr = 1; + required uint64 size = 2; + required uint64 offset = 3; + required uint64 flags = 4; +} + message criu_render_node { required uint32 gpu_id = 1; + required uint32 id = 2; + required uint32 drm_render_minor = 3; + required uint64 num_of_bos = 4; + repeated drm_bo_entry bo_entries = 5; +} + +message criu_dmabuf_node { + required uint32 gem_handle = 1; } diff --git a/plugins/amdgpu/drm.h b/plugins/amdgpu/drm.h new file mode 100644 index 000000000..3cd5cf15e --- /dev/null +++ b/plugins/amdgpu/drm.h @@ -0,0 +1,1476 @@ +/* + * Header for the Direct Rendering Manager + * + * Author: Rickard E. (Rik) Faith + * + * Acknowledgments: + * Dec 1999, Richard Henderson , move to generic cmpxchg. + */ + +/* + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DRM_H_ +#define _DRM_H_ + +#if defined(__KERNEL__) + +#include +#include +typedef unsigned int drm_handle_t; + +#elif defined(__linux__) + +#include +#include +typedef unsigned int drm_handle_t; + +#else /* One of the BSDs */ + +#include +#include +#include +typedef int8_t __s8; +typedef uint8_t __u8; +typedef int16_t __s16; +typedef uint16_t __u16; +typedef int32_t __s32; +typedef uint32_t __u32; +typedef int64_t __s64; +typedef uint64_t __u64; +typedef size_t __kernel_size_t; +typedef unsigned long drm_handle_t; + +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */ +#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */ +#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */ +#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */ + +#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */ +#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */ +#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD) +#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT) +#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT)) + +typedef unsigned int drm_context_t; +typedef unsigned int drm_drawable_t; +typedef unsigned int drm_magic_t; + +/* + * Cliprect. + * + * \warning: If you change this structure, make sure you change + * XF86DRIClipRectRec in the server as well + * + * \note KW: Actually it's illegal to change either for + * backwards-compatibility reasons. + */ +struct drm_clip_rect { + unsigned short x1; + unsigned short y1; + unsigned short x2; + unsigned short y2; +}; + +/* + * Drawable information. + */ +struct drm_drawable_info { + unsigned int num_rects; + struct drm_clip_rect *rects; +}; + +/* + * Texture region, + */ +struct drm_tex_region { + unsigned char next; + unsigned char prev; + unsigned char in_use; + unsigned char padding; + unsigned int age; +}; + +/* + * Hardware lock. + * + * The lock structure is a simple cache-line aligned integer. To avoid + * processor bus contention on a multiprocessor system, there should not be any + * other data stored in the same cache line. + */ +struct drm_hw_lock { + __volatile__ unsigned int lock; /**< lock variable */ + char padding[60]; /**< Pad to cache line */ +}; + +/* + * DRM_IOCTL_VERSION ioctl argument type. + * + * \sa drmGetVersion(). + */ +struct drm_version { + int version_major; /**< Major version */ + int version_minor; /**< Minor version */ + int version_patchlevel; /**< Patch level */ + __kernel_size_t name_len; /**< Length of name buffer */ + char __user *name; /**< Name of driver */ + __kernel_size_t date_len; /**< Length of date buffer */ + char __user *date; /**< User-space buffer to hold date */ + __kernel_size_t desc_len; /**< Length of desc buffer */ + char __user *desc; /**< User-space buffer to hold desc */ +}; + +/* + * DRM_IOCTL_GET_UNIQUE ioctl argument type. + * + * \sa drmGetBusid() and drmSetBusId(). + */ +struct drm_unique { + __kernel_size_t unique_len; /**< Length of unique */ + char __user *unique; /**< Unique name for driver instantiation */ +}; + +struct drm_list { + int count; /**< Length of user-space structures */ + struct drm_version __user *version; +}; + +struct drm_block { + int unused; +}; + +/* + * DRM_IOCTL_CONTROL ioctl argument type. + * + * \sa drmCtlInstHandler() and drmCtlUninstHandler(). + */ +struct drm_control { + enum { + DRM_ADD_COMMAND, + DRM_RM_COMMAND, + DRM_INST_HANDLER, + DRM_UNINST_HANDLER + } func; + int irq; +}; + +/* + * Type of memory to map. + */ +enum drm_map_type { + _DRM_FRAME_BUFFER = 0, /**< WC (no caching), no core dump */ + _DRM_REGISTERS = 1, /**< no caching, no core dump */ + _DRM_SHM = 2, /**< shared, cached */ + _DRM_AGP = 3, /**< AGP/GART */ + _DRM_SCATTER_GATHER = 4, /**< Scatter/gather memory for PCI DMA */ + _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */ +}; + +/* + * Memory mapping flags. + */ +enum drm_map_flags { + _DRM_RESTRICTED = 0x01, /**< Cannot be mapped to user-virtual */ + _DRM_READ_ONLY = 0x02, + _DRM_LOCKED = 0x04, /**< shared, cached, locked */ + _DRM_KERNEL = 0x08, /**< kernel requires access */ + _DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */ + _DRM_CONTAINS_LOCK = 0x20, /**< SHM page that contains lock */ + _DRM_REMOVABLE = 0x40, /**< Removable mapping */ + _DRM_DRIVER = 0x80 /**< Managed by driver */ +}; + +struct drm_ctx_priv_map { + unsigned int ctx_id; /**< Context requesting private mapping */ + void *handle; /**< Handle of map */ +}; + +/* + * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls + * argument type. + * + * \sa drmAddMap(). + */ +struct drm_map { + unsigned long offset; /**< Requested physical address (0 for SAREA)*/ + unsigned long size; /**< Requested physical size (bytes) */ + enum drm_map_type type; /**< Type of memory to map */ + enum drm_map_flags flags; /**< Flags */ + void *handle; /**< User-space: "Handle" to pass to mmap() */ + /**< Kernel-space: kernel-virtual address */ + int mtrr; /**< MTRR slot used */ + /* Private data */ +}; + +/* + * DRM_IOCTL_GET_CLIENT ioctl argument type. + */ +struct drm_client { + int idx; /**< Which client desired? */ + int auth; /**< Is client authenticated? */ + unsigned long pid; /**< Process ID */ + unsigned long uid; /**< User ID */ + unsigned long magic; /**< Magic */ + unsigned long iocs; /**< Ioctl count */ +}; + +enum drm_stat_type { + _DRM_STAT_LOCK, + _DRM_STAT_OPENS, + _DRM_STAT_CLOSES, + _DRM_STAT_IOCTLS, + _DRM_STAT_LOCKS, + _DRM_STAT_UNLOCKS, + _DRM_STAT_VALUE, /**< Generic value */ + _DRM_STAT_BYTE, /**< Generic byte counter (1024bytes/K) */ + _DRM_STAT_COUNT, /**< Generic non-byte counter (1000/k) */ + + _DRM_STAT_IRQ, /**< IRQ */ + _DRM_STAT_PRIMARY, /**< Primary DMA bytes */ + _DRM_STAT_SECONDARY, /**< Secondary DMA bytes */ + _DRM_STAT_DMA, /**< DMA */ + _DRM_STAT_SPECIAL, /**< Special DMA (e.g., priority or polled) */ + _DRM_STAT_MISSED /**< Missed DMA opportunity */ + /* Add to the *END* of the list */ +}; + +/* + * DRM_IOCTL_GET_STATS ioctl argument type. + */ +struct drm_stats { + unsigned long count; + struct { + unsigned long value; + enum drm_stat_type type; + } data[15]; +}; + +/* + * Hardware locking flags. + */ +enum drm_lock_flags { + _DRM_LOCK_READY = 0x01, /**< Wait until hardware is ready for DMA */ + _DRM_LOCK_QUIESCENT = 0x02, /**< Wait until hardware quiescent */ + _DRM_LOCK_FLUSH = 0x04, /**< Flush this context's DMA queue first */ + _DRM_LOCK_FLUSH_ALL = 0x08, /**< Flush all DMA queues first */ + /* These *HALT* flags aren't supported yet + -- they will be used to support the + full-screen DGA-like mode. */ + _DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */ + _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ +}; + +/* + * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. + * + * \sa drmGetLock() and drmUnlock(). + */ +struct drm_lock { + int context; + enum drm_lock_flags flags; +}; + +/* + * DMA flags + * + * \warning + * These values \e must match xf86drm.h. + * + * \sa drm_dma. + */ +enum drm_dma_flags { + /* Flags for DMA buffer dispatch */ + _DRM_DMA_BLOCK = 0x01, /**< + * Block until buffer dispatched. + * + * \note The buffer may not yet have + * been processed by the hardware -- + * getting a hardware lock with the + * hardware quiescent will ensure + * that the buffer has been + * processed. + */ + _DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */ + _DRM_DMA_PRIORITY = 0x04, /**< High priority dispatch */ + + /* Flags for DMA buffer request */ + _DRM_DMA_WAIT = 0x10, /**< Wait for free buffers */ + _DRM_DMA_SMALLER_OK = 0x20, /**< Smaller-than-requested buffers OK */ + _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ +}; + +/* + * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. + * + * \sa drmAddBufs(). + */ +struct drm_buf_desc { + int count; /**< Number of buffers of this size */ + int size; /**< Size in bytes */ + int low_mark; /**< Low water mark */ + int high_mark; /**< High water mark */ + enum { + _DRM_PAGE_ALIGN = 0x01, /**< Align on page boundaries for DMA */ + _DRM_AGP_BUFFER = 0x02, /**< Buffer is in AGP space */ + _DRM_SG_BUFFER = 0x04, /**< Scatter/gather memory buffer */ + _DRM_FB_BUFFER = 0x08, /**< Buffer is in frame buffer */ + _DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */ + } flags; + unsigned long agp_start; /**< + * Start address of where the AGP buffers are + * in the AGP aperture + */ +}; + +/* + * DRM_IOCTL_INFO_BUFS ioctl argument type. + */ +struct drm_buf_info { + int count; /**< Entries in list */ + struct drm_buf_desc __user *list; +}; + +/* + * DRM_IOCTL_FREE_BUFS ioctl argument type. + */ +struct drm_buf_free { + int count; + int __user *list; +}; + +/* + * Buffer information + * + * \sa drm_buf_map. + */ +struct drm_buf_pub { + int idx; /**< Index into the master buffer list */ + int total; /**< Buffer size */ + int used; /**< Amount of buffer in use (for DMA) */ + void __user *address; /**< Address of buffer */ +}; + +/* + * DRM_IOCTL_MAP_BUFS ioctl argument type. + */ +struct drm_buf_map { + int count; /**< Length of the buffer list */ +#ifdef __cplusplus + void __user *virt; +#else + void __user *virtual; /**< Mmap'd area in user-virtual */ +#endif + struct drm_buf_pub __user *list; /**< Buffer information */ +}; + +/* + * DRM_IOCTL_DMA ioctl argument type. + * + * Indices here refer to the offset into the buffer list in drm_buf_get. + * + * \sa drmDMA(). + */ +struct drm_dma { + int context; /**< Context handle */ + int send_count; /**< Number of buffers to send */ + int __user *send_indices; /**< List of handles to buffers */ + int __user *send_sizes; /**< Lengths of data to send */ + enum drm_dma_flags flags; /**< Flags */ + int request_count; /**< Number of buffers requested */ + int request_size; /**< Desired size for buffers */ + int __user *request_indices; /**< Buffer information */ + int __user *request_sizes; + int granted_count; /**< Number of buffers granted */ +}; + +enum drm_ctx_flags { + _DRM_CONTEXT_PRESERVED = 0x01, + _DRM_CONTEXT_2DONLY = 0x02 +}; + +/* + * DRM_IOCTL_ADD_CTX ioctl argument type. + * + * \sa drmCreateContext() and drmDestroyContext(). + */ +struct drm_ctx { + drm_context_t handle; + enum drm_ctx_flags flags; +}; + +/* + * DRM_IOCTL_RES_CTX ioctl argument type. + */ +struct drm_ctx_res { + int count; + struct drm_ctx __user *contexts; +}; + +/* + * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. + */ +struct drm_draw { + drm_drawable_t handle; +}; + +/* + * DRM_IOCTL_UPDATE_DRAW ioctl argument type. + */ +typedef enum { + DRM_DRAWABLE_CLIPRECTS +} drm_drawable_info_type_t; + +struct drm_update_draw { + drm_drawable_t handle; + unsigned int type; + unsigned int num; + unsigned long long data; +}; + +/* + * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. + */ +struct drm_auth { + drm_magic_t magic; +}; + +/* + * DRM_IOCTL_IRQ_BUSID ioctl argument type. + * + * \sa drmGetInterruptFromBusID(). + */ +struct drm_irq_busid { + int irq; /**< IRQ number */ + int busnum; /**< bus number */ + int devnum; /**< device number */ + int funcnum; /**< function number */ +}; + +enum drm_vblank_seq_type { + _DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */ + _DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */ + /* bits 1-6 are reserved for high crtcs */ + _DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e, + _DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */ + _DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */ + _DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */ + _DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */ + _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */ +}; +#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1 + +#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) +#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \ + _DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS) + +struct drm_wait_vblank_request { + enum drm_vblank_seq_type type; + unsigned int sequence; + unsigned long signal; +}; + +struct drm_wait_vblank_reply { + enum drm_vblank_seq_type type; + unsigned int sequence; + long tval_sec; + long tval_usec; +}; + +/* + * DRM_IOCTL_WAIT_VBLANK ioctl argument type. + * + * \sa drmWaitVBlank(). + */ +union drm_wait_vblank { + struct drm_wait_vblank_request request; + struct drm_wait_vblank_reply reply; +}; + +#define _DRM_PRE_MODESET 1 +#define _DRM_POST_MODESET 2 + +/* + * DRM_IOCTL_MODESET_CTL ioctl argument type + * + * \sa drmModesetCtl(). + */ +struct drm_modeset_ctl { + __u32 crtc; + __u32 cmd; +}; + +/* + * DRM_IOCTL_AGP_ENABLE ioctl argument type. + * + * \sa drmAgpEnable(). + */ +struct drm_agp_mode { + unsigned long mode; /**< AGP mode */ +}; + +/* + * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. + * + * \sa drmAgpAlloc() and drmAgpFree(). + */ +struct drm_agp_buffer { + unsigned long size; /**< In bytes -- will round to page boundary */ + unsigned long handle; /**< Used for binding / unbinding */ + unsigned long type; /**< Type of memory to allocate */ + unsigned long physical; /**< Physical used by i810 */ +}; + +/* + * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. + * + * \sa drmAgpBind() and drmAgpUnbind(). + */ +struct drm_agp_binding { + unsigned long handle; /**< From drm_agp_buffer */ + unsigned long offset; /**< In bytes -- will round to page boundary */ +}; + +/* + * DRM_IOCTL_AGP_INFO ioctl argument type. + * + * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), + * drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(), + * drmAgpVendorId() and drmAgpDeviceId(). + */ +struct drm_agp_info { + int agp_version_major; + int agp_version_minor; + unsigned long mode; + unsigned long aperture_base; /* physical address */ + unsigned long aperture_size; /* bytes */ + unsigned long memory_allowed; /* bytes */ + unsigned long memory_used; + + /* PCI information */ + unsigned short id_vendor; + unsigned short id_device; +}; + +/* + * DRM_IOCTL_SG_ALLOC ioctl argument type. + */ +struct drm_scatter_gather { + unsigned long size; /**< In bytes -- will round to page boundary */ + unsigned long handle; /**< Used for mapping / unmapping */ +}; + +/* + * DRM_IOCTL_SET_VERSION ioctl argument type. + */ +struct drm_set_version { + int drm_di_major; + int drm_di_minor; + int drm_dd_major; + int drm_dd_minor; +}; + +/** + * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl. + * @handle: Handle of the object to be closed. + * @pad: Padding. + * + * Releases the handle to an mm object. + */ +struct drm_gem_close { + __u32 handle; + __u32 pad; +}; + +/** + * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl. + * @handle: Handle for the object being named. + * @name: Returned global name. + * + * Create a global name for an object, returning the name. + * + * Note that the name does not hold a reference; when the object + * is freed, the name goes away. + */ +struct drm_gem_flink { + __u32 handle; + __u32 name; +}; + +/** + * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl. + * @name: Name of object being opened. + * @handle: Returned handle for the object. + * @size: Returned size of the object + * + * Open an object using the global name, returning a handle and the size. + * + * This handle (of course) holds a reference to the object, so the object + * will not go away until the handle is deleted. + */ +struct drm_gem_open { + __u32 name; + __u32 handle; + __u64 size; +}; + +/** + * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl. + * @handle: The handle of a gem object. + * @new_handle: An available gem handle. + * + * This ioctl changes the handle of a GEM object to the specified one. + * The new handle must be unused. On success the old handle is closed + * and all further IOCTL should refer to the new handle only. + * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle. + */ +struct drm_gem_change_handle { + __u32 handle; + __u32 new_handle; +}; + +/** + * DRM_CAP_DUMB_BUFFER + * + * If set to 1, the driver supports creating dumb buffers via the + * &DRM_IOCTL_MODE_CREATE_DUMB ioctl. + */ +#define DRM_CAP_DUMB_BUFFER 0x1 +/** + * DRM_CAP_VBLANK_HIGH_CRTC + * + * If set to 1, the kernel supports specifying a :ref:`CRTC index` + * in the high bits of &drm_wait_vblank_request.type. + * + * Starting kernel version 2.6.39, this capability is always set to 1. + */ +#define DRM_CAP_VBLANK_HIGH_CRTC 0x2 +/** + * DRM_CAP_DUMB_PREFERRED_DEPTH + * + * The preferred bit depth for dumb buffers. + * + * The bit depth is the number of bits used to indicate the color of a single + * pixel excluding any padding. This is different from the number of bits per + * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per + * pixel. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ +#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 +/** + * DRM_CAP_DUMB_PREFER_SHADOW + * + * If set to 1, the driver prefers userspace to render to a shadow buffer + * instead of directly rendering to a dumb buffer. For best speed, userspace + * should do streaming ordered memory copies into the dumb buffer and never + * read from it. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ +#define DRM_CAP_DUMB_PREFER_SHADOW 0x4 +/** + * DRM_CAP_PRIME + * + * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT + * and &DRM_PRIME_CAP_EXPORT. + * + * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and + * &DRM_PRIME_CAP_EXPORT are always advertised. + * + * PRIME buffers are exposed as dma-buf file descriptors. + * See :ref:`prime_buffer_sharing`. + */ +#define DRM_CAP_PRIME 0x5 +/** + * DRM_PRIME_CAP_IMPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME + * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. + */ +#define DRM_PRIME_CAP_IMPORT 0x1 +/** + * DRM_PRIME_CAP_EXPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME + * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. + */ +#define DRM_PRIME_CAP_EXPORT 0x2 +/** + * DRM_CAP_TIMESTAMP_MONOTONIC + * + * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in + * struct drm_event_vblank. If set to 1, the kernel will report timestamps with + * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these + * clocks. + * + * Starting from kernel version 2.6.39, the default value for this capability + * is 1. Starting kernel version 4.15, this capability is always set to 1. + */ +#define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 +/** + * DRM_CAP_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy + * page-flips. + */ +#define DRM_CAP_ASYNC_PAGE_FLIP 0x7 +/** + * DRM_CAP_CURSOR_WIDTH + * + * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid + * width x height combination for the hardware cursor. The intention is that a + * hardware agnostic userspace can query a cursor plane size to use. + * + * Note that the cross-driver contract is to merely return a valid size; + * drivers are free to attach another meaning on top, eg. i915 returns the + * maximum plane size. + */ +#define DRM_CAP_CURSOR_WIDTH 0x8 +/** + * DRM_CAP_CURSOR_HEIGHT + * + * See &DRM_CAP_CURSOR_WIDTH. + */ +#define DRM_CAP_CURSOR_HEIGHT 0x9 +/** + * DRM_CAP_ADDFB2_MODIFIERS + * + * If set to 1, the driver supports supplying modifiers in the + * &DRM_IOCTL_MODE_ADDFB2 ioctl. + */ +#define DRM_CAP_ADDFB2_MODIFIERS 0x10 +/** + * DRM_CAP_PAGE_FLIP_TARGET + * + * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and + * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in + * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP + * ioctl. + */ +#define DRM_CAP_PAGE_FLIP_TARGET 0x11 +/** + * DRM_CAP_CRTC_IN_VBLANK_EVENT + * + * If set to 1, the kernel supports reporting the CRTC ID in + * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and + * &DRM_EVENT_FLIP_COMPLETE events. + * + * Starting kernel version 4.12, this capability is always set to 1. + */ +#define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 +/** + * DRM_CAP_SYNCOBJ + * + * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`. + */ +#define DRM_CAP_SYNCOBJ 0x13 +/** + * DRM_CAP_SYNCOBJ_TIMELINE + * + * If set to 1, the driver supports timeline operations on sync objects. See + * :ref:`drm_sync_objects`. + */ +#define DRM_CAP_SYNCOBJ_TIMELINE 0x14 +/** + * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic + * commits. + */ +#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15 + +/* DRM_IOCTL_GET_CAP ioctl argument type */ +struct drm_get_cap { + __u64 capability; + __u64 value; +}; + +/** + * DRM_CLIENT_CAP_STEREO_3D + * + * If set to 1, the DRM core will expose the stereo 3D capabilities of the + * monitor by advertising the supported 3D layouts in the flags of struct + * drm_mode_modeinfo. See ``DRM_MODE_FLAG_3D_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 3.13. + */ +#define DRM_CLIENT_CAP_STEREO_3D 1 + +/** + * DRM_CLIENT_CAP_UNIVERSAL_PLANES + * + * If set to 1, the DRM core will expose all planes (overlay, primary, and + * cursor) to userspace. + * + * This capability has been introduced in kernel version 3.15. Starting from + * kernel version 3.17, this capability is always supported for all drivers. + */ +#define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 + +/** + * DRM_CLIENT_CAP_ATOMIC + * + * If set to 1, the DRM core will expose atomic properties to userspace. This + * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and + * &DRM_CLIENT_CAP_ASPECT_RATIO. + * + * If the driver doesn't support atomic mode-setting, enabling this capability + * will fail with -EOPNOTSUPP. + * + * This capability has been introduced in kernel version 4.0. Starting from + * kernel version 4.2, this capability is always supported for atomic-capable + * drivers. + */ +#define DRM_CLIENT_CAP_ATOMIC 3 + +/** + * DRM_CLIENT_CAP_ASPECT_RATIO + * + * If set to 1, the DRM core will provide aspect ratio information in modes. + * See ``DRM_MODE_FLAG_PIC_AR_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 4.18. + */ +#define DRM_CLIENT_CAP_ASPECT_RATIO 4 + +/** + * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS + * + * If set to 1, the DRM core will expose special connectors to be used for + * writing back to memory the scene setup in the commit. The client must enable + * &DRM_CLIENT_CAP_ATOMIC first. + * + * This capability is always supported for atomic-capable drivers starting from + * kernel version 4.19. + */ +#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 + +/** + * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT + * + * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and + * virtualbox) have additional restrictions for cursor planes (thus + * making cursor planes on those drivers not truly universal,) e.g. + * they need cursor planes to act like one would expect from a mouse + * cursor and have correctly set hotspot properties. + * If this client cap is not set the DRM core will hide cursor plane on + * those virtualized drivers because not setting it implies that the + * client is not capable of dealing with those extra restictions. + * Clients which do set cursor hotspot and treat the cursor plane + * like a mouse cursor should set this property. + * The client must enable &DRM_CLIENT_CAP_ATOMIC first. + * + * Setting this property on drivers which do not special case + * cursor planes (i.e. non-virtualized drivers) will return + * EOPNOTSUPP, which can be used by userspace to gauge + * requirements of the hardware/drivers they're running on. + * + * This capability is always supported for atomic-capable virtualized + * drivers starting from kernel version 6.6. + */ +#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6 + +/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ +struct drm_set_client_cap { + __u64 capability; + __u64 value; +}; + +#define DRM_RDWR O_RDWR +#define DRM_CLOEXEC O_CLOEXEC +struct drm_prime_handle { + __u32 handle; + + /** Flags.. only applicable for handle->fd */ + __u32 flags; + + /** Returned dmabuf file descriptor */ + __s32 fd; +}; + +struct drm_syncobj_create { + __u32 handle; +#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) + __u32 flags; +}; + +struct drm_syncobj_destroy { + __u32 handle; + __u32 pad; +}; + +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1) +struct drm_syncobj_handle { + __u32 handle; + __u32 flags; + + __s32 fd; + __u32 pad; + + __u64 point; +}; + +struct drm_syncobj_transfer { + __u32 src_handle; + __u32 dst_handle; + __u64 src_point; + __u64 dst_point; + __u32 flags; + __u32 pad; +}; + +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */ +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */ +struct drm_syncobj_wait { + __u64 handles; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; +}; + +struct drm_syncobj_timeline_wait { + __u64 handles; + /* wait on specific timeline point for every handles*/ + __u64 points; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; +}; + +/** + * struct drm_syncobj_eventfd + * @handle: syncobj handle. + * @flags: Zero to wait for the point to be signalled, or + * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be + * available for the point. + * @point: syncobj timeline point (set to zero for binary syncobjs). + * @fd: Existing eventfd to sent events to. + * @pad: Must be zero. + * + * Register an eventfd to be signalled by a syncobj. The eventfd counter will + * be incremented by one. + */ +struct drm_syncobj_eventfd { + __u32 handle; + __u32 flags; + __u64 point; + __s32 fd; + __u32 pad; +}; + + +struct drm_syncobj_array { + __u64 handles; + __u32 count_handles; + __u32 pad; +}; + +#define DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED (1 << 0) /* last available point on timeline syncobj */ +struct drm_syncobj_timeline_array { + __u64 handles; + __u64 points; + __u32 count_handles; + __u32 flags; +}; + + +/* Query current scanout sequence number */ +struct drm_crtc_get_sequence { + __u32 crtc_id; /* requested crtc_id */ + __u32 active; /* return: crtc output is active */ + __u64 sequence; /* return: most recent vblank sequence */ + __s64 sequence_ns; /* return: most recent time of first pixel out */ +}; + +/* Queue event to be delivered at specified sequence. Time stamp marks + * when the first pixel of the refresh cycle leaves the display engine + * for the display + */ +#define DRM_CRTC_SEQUENCE_RELATIVE 0x00000001 /* sequence is relative to current */ +#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS 0x00000002 /* Use next sequence if we've missed */ + +struct drm_crtc_queue_sequence { + __u32 crtc_id; + __u32 flags; + __u64 sequence; /* on input, target sequence. on output, actual sequence */ + __u64 user_data; /* user data passed to event */ +}; + +#define DRM_CLIENT_NAME_MAX_LEN 64 +struct drm_set_client_name { + __u64 name_len; + __u64 name; +}; + + +#if defined(__cplusplus) +} +#endif + +#include "drm_mode.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_IOCTL_BASE 'd' +#define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr) +#define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type) +#define DRM_IOW(nr,type) _IOW(DRM_IOCTL_BASE,nr,type) +#define DRM_IOWR(nr,type) _IOWR(DRM_IOCTL_BASE,nr,type) + +#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version) +#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique) +#define DRM_IOCTL_GET_MAGIC DRM_IOR( 0x02, struct drm_auth) +#define DRM_IOCTL_IRQ_BUSID DRM_IOWR(0x03, struct drm_irq_busid) +#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map) +#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client) +#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) +#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) +#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl) +/** + * DRM_IOCTL_GEM_CLOSE - Close a GEM handle. + * + * GEM handles are not reference-counted by the kernel. User-space is + * responsible for managing their lifetime. For example, if user-space imports + * the same memory object twice on the same DRM file description, the same GEM + * handle is returned by both imports, and user-space needs to ensure + * &DRM_IOCTL_GEM_CLOSE is performed once only. The same situation can happen + * when a memory object is allocated, then exported and imported again on the + * same DRM file description. The &DRM_IOCTL_MODE_GETFB2 IOCTL is an exception + * and always returns fresh new GEM handles even if an existing GEM handle + * already refers to the same memory object before the IOCTL is performed. + */ +#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) +#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) +#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) +#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap) +#define DRM_IOCTL_SET_CLIENT_CAP DRM_IOW( 0x0d, struct drm_set_client_cap) + +#define DRM_IOCTL_SET_UNIQUE DRM_IOW( 0x10, struct drm_unique) +#define DRM_IOCTL_AUTH_MAGIC DRM_IOW( 0x11, struct drm_auth) +#define DRM_IOCTL_BLOCK DRM_IOWR(0x12, struct drm_block) +#define DRM_IOCTL_UNBLOCK DRM_IOWR(0x13, struct drm_block) +#define DRM_IOCTL_CONTROL DRM_IOW( 0x14, struct drm_control) +#define DRM_IOCTL_ADD_MAP DRM_IOWR(0x15, struct drm_map) +#define DRM_IOCTL_ADD_BUFS DRM_IOWR(0x16, struct drm_buf_desc) +#define DRM_IOCTL_MARK_BUFS DRM_IOW( 0x17, struct drm_buf_desc) +#define DRM_IOCTL_INFO_BUFS DRM_IOWR(0x18, struct drm_buf_info) +#define DRM_IOCTL_MAP_BUFS DRM_IOWR(0x19, struct drm_buf_map) +#define DRM_IOCTL_FREE_BUFS DRM_IOW( 0x1a, struct drm_buf_free) + +#define DRM_IOCTL_RM_MAP DRM_IOW( 0x1b, struct drm_map) + +#define DRM_IOCTL_SET_SAREA_CTX DRM_IOW( 0x1c, struct drm_ctx_priv_map) +#define DRM_IOCTL_GET_SAREA_CTX DRM_IOWR(0x1d, struct drm_ctx_priv_map) + +#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e) +#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f) + +#define DRM_IOCTL_ADD_CTX DRM_IOWR(0x20, struct drm_ctx) +#define DRM_IOCTL_RM_CTX DRM_IOWR(0x21, struct drm_ctx) +#define DRM_IOCTL_MOD_CTX DRM_IOW( 0x22, struct drm_ctx) +#define DRM_IOCTL_GET_CTX DRM_IOWR(0x23, struct drm_ctx) +#define DRM_IOCTL_SWITCH_CTX DRM_IOW( 0x24, struct drm_ctx) +#define DRM_IOCTL_NEW_CTX DRM_IOW( 0x25, struct drm_ctx) +#define DRM_IOCTL_RES_CTX DRM_IOWR(0x26, struct drm_ctx_res) +#define DRM_IOCTL_ADD_DRAW DRM_IOWR(0x27, struct drm_draw) +#define DRM_IOCTL_RM_DRAW DRM_IOWR(0x28, struct drm_draw) +#define DRM_IOCTL_DMA DRM_IOWR(0x29, struct drm_dma) +#define DRM_IOCTL_LOCK DRM_IOW( 0x2a, struct drm_lock) +#define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock) +#define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock) + +/** + * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD. + * + * User-space sets &drm_prime_handle.handle with the GEM handle to export and + * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in + * &drm_prime_handle.fd. + * + * The export can fail for any driver-specific reason, e.g. because export is + * not supported for this specific GEM handle (but might be for others). + * + * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT. + */ +#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) +/** + * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle. + * + * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to + * import, and gets back a GEM handle in &drm_prime_handle.handle. + * &drm_prime_handle.flags is unused. + * + * If an existing GEM handle refers to the memory object backing the DMA-BUF, + * that GEM handle is returned. Therefore user-space which needs to handle + * arbitrary DMA-BUFs must have a user-space lookup data structure to manually + * reference-count duplicated GEM handles. For more information see + * &DRM_IOCTL_GEM_CLOSE. + * + * The import can fail for any driver-specific reason, e.g. because import is + * only supported for DMA-BUFs allocated on this DRM device. + * + * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT. + */ +#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) + +#define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30) +#define DRM_IOCTL_AGP_RELEASE DRM_IO( 0x31) +#define DRM_IOCTL_AGP_ENABLE DRM_IOW( 0x32, struct drm_agp_mode) +#define DRM_IOCTL_AGP_INFO DRM_IOR( 0x33, struct drm_agp_info) +#define DRM_IOCTL_AGP_ALLOC DRM_IOWR(0x34, struct drm_agp_buffer) +#define DRM_IOCTL_AGP_FREE DRM_IOW( 0x35, struct drm_agp_buffer) +#define DRM_IOCTL_AGP_BIND DRM_IOW( 0x36, struct drm_agp_binding) +#define DRM_IOCTL_AGP_UNBIND DRM_IOW( 0x37, struct drm_agp_binding) + +#define DRM_IOCTL_SG_ALLOC DRM_IOWR(0x38, struct drm_scatter_gather) +#define DRM_IOCTL_SG_FREE DRM_IOW( 0x39, struct drm_scatter_gather) + +#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank) + +#define DRM_IOCTL_CRTC_GET_SEQUENCE DRM_IOWR(0x3b, struct drm_crtc_get_sequence) +#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE DRM_IOWR(0x3c, struct drm_crtc_queue_sequence) + +#define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw) + +#define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res) +#define DRM_IOCTL_MODE_GETCRTC DRM_IOWR(0xA1, struct drm_mode_crtc) +#define DRM_IOCTL_MODE_SETCRTC DRM_IOWR(0xA2, struct drm_mode_crtc) +#define DRM_IOCTL_MODE_CURSOR DRM_IOWR(0xA3, struct drm_mode_cursor) +#define DRM_IOCTL_MODE_GETGAMMA DRM_IOWR(0xA4, struct drm_mode_crtc_lut) +#define DRM_IOCTL_MODE_SETGAMMA DRM_IOWR(0xA5, struct drm_mode_crtc_lut) +#define DRM_IOCTL_MODE_GETENCODER DRM_IOWR(0xA6, struct drm_mode_get_encoder) +#define DRM_IOCTL_MODE_GETCONNECTOR DRM_IOWR(0xA7, struct drm_mode_get_connector) +#define DRM_IOCTL_MODE_ATTACHMODE DRM_IOWR(0xA8, struct drm_mode_mode_cmd) /* deprecated (never worked) */ +#define DRM_IOCTL_MODE_DETACHMODE DRM_IOWR(0xA9, struct drm_mode_mode_cmd) /* deprecated (never worked) */ + +#define DRM_IOCTL_MODE_GETPROPERTY DRM_IOWR(0xAA, struct drm_mode_get_property) +#define DRM_IOCTL_MODE_SETPROPERTY DRM_IOWR(0xAB, struct drm_mode_connector_set_property) +#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) +#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) +#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) +/** + * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. + * + * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * Warning: removing a framebuffer currently in-use on an enabled plane will + * disable that plane. The CRTC the plane is linked to may also be disabled + * (depending on driver capabilities). + */ +#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) +#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) +#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) + +/** + * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object. + * + * KMS dumb buffers provide a very primitive way to allocate a buffer object + * suitable for scanout and map it for software rendering. KMS dumb buffers are + * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb + * buffers are not suitable to be displayed on any other device than the KMS + * device where they were allocated from. Also see + * :ref:`kms_dumb_buffer_objects`. + * + * The IOCTL argument is a struct drm_mode_create_dumb. + * + * User-space is expected to create a KMS dumb buffer via this IOCTL, then add + * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via + * &DRM_IOCTL_MODE_MAP_DUMB. + * + * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported. + * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate + * driver preferences for dumb buffers. + */ +#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb) +#define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb) +#define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb) +#define DRM_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xB5, struct drm_mode_get_plane_res) +#define DRM_IOCTL_MODE_GETPLANE DRM_IOWR(0xB6, struct drm_mode_get_plane) +#define DRM_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct drm_mode_set_plane) +#define DRM_IOCTL_MODE_ADDFB2 DRM_IOWR(0xB8, struct drm_mode_fb_cmd2) +#define DRM_IOCTL_MODE_OBJ_GETPROPERTIES DRM_IOWR(0xB9, struct drm_mode_obj_get_properties) +#define DRM_IOCTL_MODE_OBJ_SETPROPERTY DRM_IOWR(0xBA, struct drm_mode_obj_set_property) +#define DRM_IOCTL_MODE_CURSOR2 DRM_IOWR(0xBB, struct drm_mode_cursor2) +#define DRM_IOCTL_MODE_ATOMIC DRM_IOWR(0xBC, struct drm_mode_atomic) +#define DRM_IOCTL_MODE_CREATEPROPBLOB DRM_IOWR(0xBD, struct drm_mode_create_blob) +#define DRM_IOCTL_MODE_DESTROYPROPBLOB DRM_IOWR(0xBE, struct drm_mode_destroy_blob) + +#define DRM_IOCTL_SYNCOBJ_CREATE DRM_IOWR(0xBF, struct drm_syncobj_create) +#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) +#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) +#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) +#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) + +#define DRM_IOCTL_MODE_CREATE_LEASE DRM_IOWR(0xC6, struct drm_mode_create_lease) +#define DRM_IOCTL_MODE_LIST_LESSEES DRM_IOWR(0xC7, struct drm_mode_list_lessees) +#define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease) +#define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease) + +#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) +#define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct drm_syncobj_timeline_array) +#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) + +/** + * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. + * + * This queries metadata about a framebuffer. User-space fills + * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the + * struct as the output. + * + * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles + * will be filled with GEM buffer handles. Fresh new GEM handles are always + * returned, even if another GEM handle referring to the same memory object + * already exists on the DRM file description. The caller is responsible for + * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same + * new handle will be returned for multiple planes in case they use the same + * memory object. Planes are valid until one has a zero handle -- this can be + * used to compute the number of planes. + * + * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid + * until one has a zero &drm_mode_fb_cmd2.pitches. + * + * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set + * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the + * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. + * + * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space + * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately + * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not + * double-close handles which are specified multiple times in the array. + */ +#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) + +#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) + +/** + * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer. + * + * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable + * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept + * alive. When the plane no longer uses the framebuffer (because the + * framebuffer is replaced with another one, or the plane is disabled), the + * framebuffer is cleaned up. + * + * This is useful to implement flicker-free transitions between two processes. + * + * Depending on the threat model, user-space may want to ensure that the + * framebuffer doesn't expose any sensitive user information: closed + * framebuffers attached to a plane can be read back by the next DRM master. + */ +#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) + +/** + * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file + * + * Having a name allows for easier tracking and debugging. + * The length of the name (without null ending char) must be + * <= DRM_CLIENT_NAME_MAX_LEN. + * The call will fail if the name contains whitespaces or non-printable chars. + */ +#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) + +/** + * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle + * + * Some applications (notably CRIU) need objects to have specific gem handles. + * This ioctl changes the object at one gem handle to use a new gem handle. + */ +#define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle) + +/* + * Device specific ioctls should only be in their respective headers + * The device specific ioctl range is from 0x40 to 0x9f. + * Generic IOCTLS restart at 0xA0. + * + * \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and + * drmCommandReadWrite(). + */ +#define DRM_COMMAND_BASE 0x40 +#define DRM_COMMAND_END 0xA0 + +/** + * struct drm_event - Header for DRM events + * @type: event type. + * @length: total number of payload bytes (including header). + * + * This struct is a header for events written back to user-space on the DRM FD. + * A read on the DRM FD will always only return complete events: e.g. if the + * read buffer is 100 bytes large and there are two 64 byte events pending, + * only one will be returned. + * + * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and + * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK, + * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE. + */ +struct drm_event { + __u32 type; + __u32 length; +}; + +/** + * DRM_EVENT_VBLANK - vertical blanking event + * + * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the + * &_DRM_VBLANK_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ +#define DRM_EVENT_VBLANK 0x01 +/** + * DRM_EVENT_FLIP_COMPLETE - page-flip completion event + * + * This event is sent in response to an atomic commit or legacy page-flip with + * the &DRM_MODE_PAGE_FLIP_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ +#define DRM_EVENT_FLIP_COMPLETE 0x02 +/** + * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event + * + * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE. + * + * The event payload is a struct drm_event_crtc_sequence. + */ +#define DRM_EVENT_CRTC_SEQUENCE 0x03 + +struct drm_event_vblank { + struct drm_event base; + __u64 user_data; + __u32 tv_sec; + __u32 tv_usec; + __u32 sequence; + __u32 crtc_id; /* 0 on older kernels that do not support this */ +}; + +/* Event delivered at sequence. Time stamp marks when the first pixel + * of the refresh cycle leaves the display engine for the display + */ +struct drm_event_crtc_sequence { + struct drm_event base; + __u64 user_data; + __s64 time_ns; + __u64 sequence; +}; + +/* typedef area */ +#ifndef __KERNEL__ +typedef struct drm_clip_rect drm_clip_rect_t; +typedef struct drm_drawable_info drm_drawable_info_t; +typedef struct drm_tex_region drm_tex_region_t; +typedef struct drm_hw_lock drm_hw_lock_t; +typedef struct drm_version drm_version_t; +typedef struct drm_unique drm_unique_t; +typedef struct drm_list drm_list_t; +typedef struct drm_block drm_block_t; +typedef struct drm_control drm_control_t; +typedef enum drm_map_type drm_map_type_t; +typedef enum drm_map_flags drm_map_flags_t; +typedef struct drm_ctx_priv_map drm_ctx_priv_map_t; +typedef struct drm_map drm_map_t; +typedef struct drm_client drm_client_t; +typedef enum drm_stat_type drm_stat_type_t; +typedef struct drm_stats drm_stats_t; +typedef enum drm_lock_flags drm_lock_flags_t; +typedef struct drm_lock drm_lock_t; +typedef enum drm_dma_flags drm_dma_flags_t; +typedef struct drm_buf_desc drm_buf_desc_t; +typedef struct drm_buf_info drm_buf_info_t; +typedef struct drm_buf_free drm_buf_free_t; +typedef struct drm_buf_pub drm_buf_pub_t; +typedef struct drm_buf_map drm_buf_map_t; +typedef struct drm_dma drm_dma_t; +typedef union drm_wait_vblank drm_wait_vblank_t; +typedef struct drm_agp_mode drm_agp_mode_t; +typedef enum drm_ctx_flags drm_ctx_flags_t; +typedef struct drm_ctx drm_ctx_t; +typedef struct drm_ctx_res drm_ctx_res_t; +typedef struct drm_draw drm_draw_t; +typedef struct drm_update_draw drm_update_draw_t; +typedef struct drm_auth drm_auth_t; +typedef struct drm_irq_busid drm_irq_busid_t; +typedef enum drm_vblank_seq_type drm_vblank_seq_type_t; + +typedef struct drm_agp_buffer drm_agp_buffer_t; +typedef struct drm_agp_binding drm_agp_binding_t; +typedef struct drm_agp_info drm_agp_info_t; +typedef struct drm_scatter_gather drm_scatter_gather_t; +typedef struct drm_set_version drm_set_version_t; +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/plugins/amdgpu/drm_mode.h b/plugins/amdgpu/drm_mode.h new file mode 100644 index 000000000..c082810c0 --- /dev/null +++ b/plugins/amdgpu/drm_mode.h @@ -0,0 +1,1362 @@ +/* + * Copyright (c) 2007 Dave Airlie + * Copyright (c) 2007 Jakob Bornecrantz + * Copyright (c) 2008 Red Hat Inc. + * Copyright (c) 2007-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA + * Copyright (c) 2007-2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _DRM_MODE_H +#define _DRM_MODE_H + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * DOC: overview + * + * DRM exposes many UAPI and structure definitions to have a consistent + * and standardized interface with users. + * Userspace can refer to these structure definitions and UAPI formats + * to communicate to drivers. + */ + +#define DRM_CONNECTOR_NAME_LEN 32 +#define DRM_DISPLAY_MODE_LEN 32 +#define DRM_PROP_NAME_LEN 32 + +#define DRM_MODE_TYPE_BUILTIN (1<<0) /* deprecated */ +#define DRM_MODE_TYPE_CLOCK_C ((1<<1) | DRM_MODE_TYPE_BUILTIN) /* deprecated */ +#define DRM_MODE_TYPE_CRTC_C ((1<<2) | DRM_MODE_TYPE_BUILTIN) /* deprecated */ +#define DRM_MODE_TYPE_PREFERRED (1<<3) +#define DRM_MODE_TYPE_DEFAULT (1<<4) /* deprecated */ +#define DRM_MODE_TYPE_USERDEF (1<<5) +#define DRM_MODE_TYPE_DRIVER (1<<6) + +#define DRM_MODE_TYPE_ALL (DRM_MODE_TYPE_PREFERRED | \ + DRM_MODE_TYPE_USERDEF | \ + DRM_MODE_TYPE_DRIVER) + +/* Video mode flags */ +/* bit compatible with the xrandr RR_ definitions (bits 0-13) + * + * ABI warning: Existing userspace really expects + * the mode flags to match the xrandr definitions. Any + * changes that don't match the xrandr definitions will + * likely need a new client cap or some other mechanism + * to avoid breaking existing userspace. This includes + * allocating new flags in the previously unused bits! + */ +#define DRM_MODE_FLAG_PHSYNC (1<<0) +#define DRM_MODE_FLAG_NHSYNC (1<<1) +#define DRM_MODE_FLAG_PVSYNC (1<<2) +#define DRM_MODE_FLAG_NVSYNC (1<<3) +#define DRM_MODE_FLAG_INTERLACE (1<<4) +#define DRM_MODE_FLAG_DBLSCAN (1<<5) +#define DRM_MODE_FLAG_CSYNC (1<<6) +#define DRM_MODE_FLAG_PCSYNC (1<<7) +#define DRM_MODE_FLAG_NCSYNC (1<<8) +#define DRM_MODE_FLAG_HSKEW (1<<9) /* hskew provided */ +#define DRM_MODE_FLAG_BCAST (1<<10) /* deprecated */ +#define DRM_MODE_FLAG_PIXMUX (1<<11) /* deprecated */ +#define DRM_MODE_FLAG_DBLCLK (1<<12) +#define DRM_MODE_FLAG_CLKDIV2 (1<<13) + /* + * When adding a new stereo mode don't forget to adjust DRM_MODE_FLAGS_3D_MAX + * (define not exposed to user space). + */ +#define DRM_MODE_FLAG_3D_MASK (0x1f<<14) +#define DRM_MODE_FLAG_3D_NONE (0<<14) +#define DRM_MODE_FLAG_3D_FRAME_PACKING (1<<14) +#define DRM_MODE_FLAG_3D_FIELD_ALTERNATIVE (2<<14) +#define DRM_MODE_FLAG_3D_LINE_ALTERNATIVE (3<<14) +#define DRM_MODE_FLAG_3D_SIDE_BY_SIDE_FULL (4<<14) +#define DRM_MODE_FLAG_3D_L_DEPTH (5<<14) +#define DRM_MODE_FLAG_3D_L_DEPTH_GFX_GFX_DEPTH (6<<14) +#define DRM_MODE_FLAG_3D_TOP_AND_BOTTOM (7<<14) +#define DRM_MODE_FLAG_3D_SIDE_BY_SIDE_HALF (8<<14) + +/* Picture aspect ratio options */ +#define DRM_MODE_PICTURE_ASPECT_NONE 0 +#define DRM_MODE_PICTURE_ASPECT_4_3 1 +#define DRM_MODE_PICTURE_ASPECT_16_9 2 +#define DRM_MODE_PICTURE_ASPECT_64_27 3 +#define DRM_MODE_PICTURE_ASPECT_256_135 4 + +/* Content type options */ +#define DRM_MODE_CONTENT_TYPE_NO_DATA 0 +#define DRM_MODE_CONTENT_TYPE_GRAPHICS 1 +#define DRM_MODE_CONTENT_TYPE_PHOTO 2 +#define DRM_MODE_CONTENT_TYPE_CINEMA 3 +#define DRM_MODE_CONTENT_TYPE_GAME 4 + +/* Aspect ratio flag bitmask (4 bits 22:19) */ +#define DRM_MODE_FLAG_PIC_AR_MASK (0x0F<<19) +#define DRM_MODE_FLAG_PIC_AR_NONE \ + (DRM_MODE_PICTURE_ASPECT_NONE<<19) +#define DRM_MODE_FLAG_PIC_AR_4_3 \ + (DRM_MODE_PICTURE_ASPECT_4_3<<19) +#define DRM_MODE_FLAG_PIC_AR_16_9 \ + (DRM_MODE_PICTURE_ASPECT_16_9<<19) +#define DRM_MODE_FLAG_PIC_AR_64_27 \ + (DRM_MODE_PICTURE_ASPECT_64_27<<19) +#define DRM_MODE_FLAG_PIC_AR_256_135 \ + (DRM_MODE_PICTURE_ASPECT_256_135<<19) + +#define DRM_MODE_FLAG_ALL (DRM_MODE_FLAG_PHSYNC | \ + DRM_MODE_FLAG_NHSYNC | \ + DRM_MODE_FLAG_PVSYNC | \ + DRM_MODE_FLAG_NVSYNC | \ + DRM_MODE_FLAG_INTERLACE | \ + DRM_MODE_FLAG_DBLSCAN | \ + DRM_MODE_FLAG_CSYNC | \ + DRM_MODE_FLAG_PCSYNC | \ + DRM_MODE_FLAG_NCSYNC | \ + DRM_MODE_FLAG_HSKEW | \ + DRM_MODE_FLAG_DBLCLK | \ + DRM_MODE_FLAG_CLKDIV2 | \ + DRM_MODE_FLAG_3D_MASK) + +/* DPMS flags */ +/* bit compatible with the xorg definitions. */ +#define DRM_MODE_DPMS_ON 0 +#define DRM_MODE_DPMS_STANDBY 1 +#define DRM_MODE_DPMS_SUSPEND 2 +#define DRM_MODE_DPMS_OFF 3 + +/* Scaling mode options */ +#define DRM_MODE_SCALE_NONE 0 /* Unmodified timing (display or + software can still scale) */ +#define DRM_MODE_SCALE_FULLSCREEN 1 /* Full screen, ignore aspect */ +#define DRM_MODE_SCALE_CENTER 2 /* Centered, no scaling */ +#define DRM_MODE_SCALE_ASPECT 3 /* Full screen, preserve aspect */ + +/* Dithering mode options */ +#define DRM_MODE_DITHERING_OFF 0 +#define DRM_MODE_DITHERING_ON 1 +#define DRM_MODE_DITHERING_AUTO 2 + +/* Dirty info options */ +#define DRM_MODE_DIRTY_OFF 0 +#define DRM_MODE_DIRTY_ON 1 +#define DRM_MODE_DIRTY_ANNOTATE 2 + +/* Link Status options */ +#define DRM_MODE_LINK_STATUS_GOOD 0 +#define DRM_MODE_LINK_STATUS_BAD 1 + +/* + * DRM_MODE_ROTATE_ + * + * Signals that a drm plane is been rotated degrees in counter + * clockwise direction. + * + * This define is provided as a convenience, looking up the property id + * using the name->prop id lookup is the preferred method. + */ +#define DRM_MODE_ROTATE_0 (1<<0) +#define DRM_MODE_ROTATE_90 (1<<1) +#define DRM_MODE_ROTATE_180 (1<<2) +#define DRM_MODE_ROTATE_270 (1<<3) + +/* + * DRM_MODE_ROTATE_MASK + * + * Bitmask used to look for drm plane rotations. + */ +#define DRM_MODE_ROTATE_MASK (\ + DRM_MODE_ROTATE_0 | \ + DRM_MODE_ROTATE_90 | \ + DRM_MODE_ROTATE_180 | \ + DRM_MODE_ROTATE_270) + +/* + * DRM_MODE_REFLECT_ + * + * Signals that the contents of a drm plane is reflected along the axis, + * in the same way as mirroring. + * See kerneldoc chapter "Plane Composition Properties" for more details. + * + * This define is provided as a convenience, looking up the property id + * using the name->prop id lookup is the preferred method. + */ +#define DRM_MODE_REFLECT_X (1<<4) +#define DRM_MODE_REFLECT_Y (1<<5) + +/* + * DRM_MODE_REFLECT_MASK + * + * Bitmask used to look for drm plane reflections. + */ +#define DRM_MODE_REFLECT_MASK (\ + DRM_MODE_REFLECT_X | \ + DRM_MODE_REFLECT_Y) + +/* Content Protection Flags */ +#define DRM_MODE_CONTENT_PROTECTION_UNDESIRED 0 +#define DRM_MODE_CONTENT_PROTECTION_DESIRED 1 +#define DRM_MODE_CONTENT_PROTECTION_ENABLED 2 + +/** + * struct drm_mode_modeinfo - Display mode information. + * @clock: pixel clock in kHz + * @hdisplay: horizontal display size + * @hsync_start: horizontal sync start + * @hsync_end: horizontal sync end + * @htotal: horizontal total size + * @hskew: horizontal skew + * @vdisplay: vertical display size + * @vsync_start: vertical sync start + * @vsync_end: vertical sync end + * @vtotal: vertical total size + * @vscan: vertical scan + * @vrefresh: approximate vertical refresh rate in Hz + * @flags: bitmask of misc. flags, see DRM_MODE_FLAG_* defines + * @type: bitmask of type flags, see DRM_MODE_TYPE_* defines + * @name: string describing the mode resolution + * + * This is the user-space API display mode information structure. For the + * kernel version see struct drm_display_mode. + */ +struct drm_mode_modeinfo { + __u32 clock; + __u16 hdisplay; + __u16 hsync_start; + __u16 hsync_end; + __u16 htotal; + __u16 hskew; + __u16 vdisplay; + __u16 vsync_start; + __u16 vsync_end; + __u16 vtotal; + __u16 vscan; + + __u32 vrefresh; + + __u32 flags; + __u32 type; + char name[DRM_DISPLAY_MODE_LEN]; +}; + +struct drm_mode_card_res { + __u64 fb_id_ptr; + __u64 crtc_id_ptr; + __u64 connector_id_ptr; + __u64 encoder_id_ptr; + __u32 count_fbs; + __u32 count_crtcs; + __u32 count_connectors; + __u32 count_encoders; + __u32 min_width; + __u32 max_width; + __u32 min_height; + __u32 max_height; +}; + +struct drm_mode_crtc { + __u64 set_connectors_ptr; + __u32 count_connectors; + + __u32 crtc_id; /**< Id */ + __u32 fb_id; /**< Id of framebuffer */ + + __u32 x; /**< x Position on the framebuffer */ + __u32 y; /**< y Position on the framebuffer */ + + __u32 gamma_size; + __u32 mode_valid; + struct drm_mode_modeinfo mode; +}; + +#define DRM_MODE_PRESENT_TOP_FIELD (1<<0) +#define DRM_MODE_PRESENT_BOTTOM_FIELD (1<<1) + +/* Planes blend with or override other bits on the CRTC */ +struct drm_mode_set_plane { + __u32 plane_id; + __u32 crtc_id; + __u32 fb_id; /* fb object contains surface format type */ + __u32 flags; /* see above flags */ + + /* Signed dest location allows it to be partially off screen */ + __s32 crtc_x; + __s32 crtc_y; + __u32 crtc_w; + __u32 crtc_h; + + /* Source values are 16.16 fixed point */ + __u32 src_x; + __u32 src_y; + __u32 src_h; + __u32 src_w; +}; + +/** + * struct drm_mode_get_plane - Get plane metadata. + * + * Userspace can perform a GETPLANE ioctl to retrieve information about a + * plane. + * + * To retrieve the number of formats supported, set @count_format_types to zero + * and call the ioctl. @count_format_types will be updated with the value. + * + * To retrieve these formats, allocate an array with the memory needed to store + * @count_format_types formats. Point @format_type_ptr to this array and call + * the ioctl again (with @count_format_types still set to the value returned in + * the first ioctl call). + */ +struct drm_mode_get_plane { + /** + * @plane_id: Object ID of the plane whose information should be + * retrieved. Set by caller. + */ + __u32 plane_id; + + /** @crtc_id: Object ID of the current CRTC. */ + __u32 crtc_id; + /** @fb_id: Object ID of the current fb. */ + __u32 fb_id; + + /** + * @possible_crtcs: Bitmask of CRTC's compatible with the plane. CRTC's + * are created and they receive an index, which corresponds to their + * position in the bitmask. Bit N corresponds to + * :ref:`CRTC index` N. + */ + __u32 possible_crtcs; + /** @gamma_size: Never used. */ + __u32 gamma_size; + + /** @count_format_types: Number of formats. */ + __u32 count_format_types; + /** + * @format_type_ptr: Pointer to ``__u32`` array of formats that are + * supported by the plane. These formats do not require modifiers. + */ + __u64 format_type_ptr; +}; + +struct drm_mode_get_plane_res { + __u64 plane_id_ptr; + __u32 count_planes; +}; + +#define DRM_MODE_ENCODER_NONE 0 +#define DRM_MODE_ENCODER_DAC 1 +#define DRM_MODE_ENCODER_TMDS 2 +#define DRM_MODE_ENCODER_LVDS 3 +#define DRM_MODE_ENCODER_TVDAC 4 +#define DRM_MODE_ENCODER_VIRTUAL 5 +#define DRM_MODE_ENCODER_DSI 6 +#define DRM_MODE_ENCODER_DPMST 7 +#define DRM_MODE_ENCODER_DPI 8 + +struct drm_mode_get_encoder { + __u32 encoder_id; + __u32 encoder_type; + + __u32 crtc_id; /**< Id of crtc */ + + __u32 possible_crtcs; + __u32 possible_clones; +}; + +/* This is for connectors with multiple signal types. */ +/* Try to match DRM_MODE_CONNECTOR_X as closely as possible. */ +enum drm_mode_subconnector { + DRM_MODE_SUBCONNECTOR_Automatic = 0, /* DVI-I, TV */ + DRM_MODE_SUBCONNECTOR_Unknown = 0, /* DVI-I, TV, DP */ + DRM_MODE_SUBCONNECTOR_VGA = 1, /* DP */ + DRM_MODE_SUBCONNECTOR_DVID = 3, /* DVI-I DP */ + DRM_MODE_SUBCONNECTOR_DVIA = 4, /* DVI-I */ + DRM_MODE_SUBCONNECTOR_Composite = 5, /* TV */ + DRM_MODE_SUBCONNECTOR_SVIDEO = 6, /* TV */ + DRM_MODE_SUBCONNECTOR_Component = 8, /* TV */ + DRM_MODE_SUBCONNECTOR_SCART = 9, /* TV */ + DRM_MODE_SUBCONNECTOR_DisplayPort = 10, /* DP */ + DRM_MODE_SUBCONNECTOR_HDMIA = 11, /* DP */ + DRM_MODE_SUBCONNECTOR_Native = 15, /* DP */ + DRM_MODE_SUBCONNECTOR_Wireless = 18, /* DP */ +}; + +#define DRM_MODE_CONNECTOR_Unknown 0 +#define DRM_MODE_CONNECTOR_VGA 1 +#define DRM_MODE_CONNECTOR_DVII 2 +#define DRM_MODE_CONNECTOR_DVID 3 +#define DRM_MODE_CONNECTOR_DVIA 4 +#define DRM_MODE_CONNECTOR_Composite 5 +#define DRM_MODE_CONNECTOR_SVIDEO 6 +#define DRM_MODE_CONNECTOR_LVDS 7 +#define DRM_MODE_CONNECTOR_Component 8 +#define DRM_MODE_CONNECTOR_9PinDIN 9 +#define DRM_MODE_CONNECTOR_DisplayPort 10 +#define DRM_MODE_CONNECTOR_HDMIA 11 +#define DRM_MODE_CONNECTOR_HDMIB 12 +#define DRM_MODE_CONNECTOR_TV 13 +#define DRM_MODE_CONNECTOR_eDP 14 +#define DRM_MODE_CONNECTOR_VIRTUAL 15 +#define DRM_MODE_CONNECTOR_DSI 16 +#define DRM_MODE_CONNECTOR_DPI 17 +#define DRM_MODE_CONNECTOR_WRITEBACK 18 +#define DRM_MODE_CONNECTOR_SPI 19 +#define DRM_MODE_CONNECTOR_USB 20 + +/** + * struct drm_mode_get_connector - Get connector metadata. + * + * User-space can perform a GETCONNECTOR ioctl to retrieve information about a + * connector. User-space is expected to retrieve encoders, modes and properties + * by performing this ioctl at least twice: the first time to retrieve the + * number of elements, the second time to retrieve the elements themselves. + * + * To retrieve the number of elements, set @count_props and @count_encoders to + * zero, set @count_modes to 1, and set @modes_ptr to a temporary struct + * drm_mode_modeinfo element. + * + * To retrieve the elements, allocate arrays for @encoders_ptr, @modes_ptr, + * @props_ptr and @prop_values_ptr, then set @count_modes, @count_props and + * @count_encoders to their capacity. + * + * Performing the ioctl only twice may be racy: the number of elements may have + * changed with a hotplug event in-between the two ioctls. User-space is + * expected to retry the last ioctl until the number of elements stabilizes. + * The kernel won't fill any array which doesn't have the expected length. + * + * **Force-probing a connector** + * + * If the @count_modes field is set to zero and the DRM client is the current + * DRM master, the kernel will perform a forced probe on the connector to + * refresh the connector status, modes and EDID. A forced-probe can be slow, + * might cause flickering and the ioctl will block. + * + * User-space needs to force-probe connectors to ensure their metadata is + * up-to-date at startup and after receiving a hot-plug event. User-space + * may perform a forced-probe when the user explicitly requests it. User-space + * shouldn't perform a forced-probe in other situations. + */ +struct drm_mode_get_connector { + /** @encoders_ptr: Pointer to ``__u32`` array of object IDs. */ + __u64 encoders_ptr; + /** @modes_ptr: Pointer to struct drm_mode_modeinfo array. */ + __u64 modes_ptr; + /** @props_ptr: Pointer to ``__u32`` array of property IDs. */ + __u64 props_ptr; + /** @prop_values_ptr: Pointer to ``__u64`` array of property values. */ + __u64 prop_values_ptr; + + /** @count_modes: Number of modes. */ + __u32 count_modes; + /** @count_props: Number of properties. */ + __u32 count_props; + /** @count_encoders: Number of encoders. */ + __u32 count_encoders; + + /** @encoder_id: Object ID of the current encoder. */ + __u32 encoder_id; + /** @connector_id: Object ID of the connector. */ + __u32 connector_id; + /** + * @connector_type: Type of the connector. + * + * See DRM_MODE_CONNECTOR_* defines. + */ + __u32 connector_type; + /** + * @connector_type_id: Type-specific connector number. + * + * This is not an object ID. This is a per-type connector number. Each + * (type, type_id) combination is unique across all connectors of a DRM + * device. + * + * The (type, type_id) combination is not a stable identifier: the + * type_id can change depending on the driver probe order. + */ + __u32 connector_type_id; + + /** + * @connection: Status of the connector. + * + * See enum drm_connector_status. + */ + __u32 connection; + /** @mm_width: Width of the connected sink in millimeters. */ + __u32 mm_width; + /** @mm_height: Height of the connected sink in millimeters. */ + __u32 mm_height; + /** + * @subpixel: Subpixel order of the connected sink. + * + * See enum subpixel_order. + */ + __u32 subpixel; + + /** @pad: Padding, must be zero. */ + __u32 pad; +}; + +#define DRM_MODE_PROP_PENDING (1<<0) /* deprecated, do not use */ +#define DRM_MODE_PROP_RANGE (1<<1) +#define DRM_MODE_PROP_IMMUTABLE (1<<2) +#define DRM_MODE_PROP_ENUM (1<<3) /* enumerated type with text strings */ +#define DRM_MODE_PROP_BLOB (1<<4) +#define DRM_MODE_PROP_BITMASK (1<<5) /* bitmask of enumerated types */ + +/* non-extended types: legacy bitmask, one bit per type: */ +#define DRM_MODE_PROP_LEGACY_TYPE ( \ + DRM_MODE_PROP_RANGE | \ + DRM_MODE_PROP_ENUM | \ + DRM_MODE_PROP_BLOB | \ + DRM_MODE_PROP_BITMASK) + +/* extended-types: rather than continue to consume a bit per type, + * grab a chunk of the bits to use as integer type id. + */ +#define DRM_MODE_PROP_EXTENDED_TYPE 0x0000ffc0 +#define DRM_MODE_PROP_TYPE(n) ((n) << 6) +#define DRM_MODE_PROP_OBJECT DRM_MODE_PROP_TYPE(1) +#define DRM_MODE_PROP_SIGNED_RANGE DRM_MODE_PROP_TYPE(2) + +/* the PROP_ATOMIC flag is used to hide properties from userspace that + * is not aware of atomic properties. This is mostly to work around + * older userspace (DDX drivers) that read/write each prop they find, + * without being aware that this could be triggering a lengthy modeset. + */ +#define DRM_MODE_PROP_ATOMIC 0x80000000 + +/** + * struct drm_mode_property_enum - Description for an enum/bitfield entry. + * @value: numeric value for this enum entry. + * @name: symbolic name for this enum entry. + * + * See struct drm_property_enum for details. + */ +struct drm_mode_property_enum { + __u64 value; + char name[DRM_PROP_NAME_LEN]; +}; + +/** + * struct drm_mode_get_property - Get property metadata. + * + * User-space can perform a GETPROPERTY ioctl to retrieve information about a + * property. The same property may be attached to multiple objects, see + * "Modeset Base Object Abstraction". + * + * The meaning of the @values_ptr field changes depending on the property type. + * See &drm_property.flags for more details. + * + * The @enum_blob_ptr and @count_enum_blobs fields are only meaningful when the + * property has the type &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK. For + * backwards compatibility, the kernel will always set @count_enum_blobs to + * zero when the property has the type &DRM_MODE_PROP_BLOB. User-space must + * ignore these two fields if the property has a different type. + * + * User-space is expected to retrieve values and enums by performing this ioctl + * at least twice: the first time to retrieve the number of elements, the + * second time to retrieve the elements themselves. + * + * To retrieve the number of elements, set @count_values and @count_enum_blobs + * to zero, then call the ioctl. @count_values will be updated with the number + * of elements. If the property has the type &DRM_MODE_PROP_ENUM or + * &DRM_MODE_PROP_BITMASK, @count_enum_blobs will be updated as well. + * + * To retrieve the elements themselves, allocate an array for @values_ptr and + * set @count_values to its capacity. If the property has the type + * &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK, allocate an array for + * @enum_blob_ptr and set @count_enum_blobs to its capacity. Calling the ioctl + * again will fill the arrays. + */ +struct drm_mode_get_property { + /** @values_ptr: Pointer to a ``__u64`` array. */ + __u64 values_ptr; + /** @enum_blob_ptr: Pointer to a struct drm_mode_property_enum array. */ + __u64 enum_blob_ptr; + + /** + * @prop_id: Object ID of the property which should be retrieved. Set + * by the caller. + */ + __u32 prop_id; + /** + * @flags: ``DRM_MODE_PROP_*`` bitfield. See &drm_property.flags for + * a definition of the flags. + */ + __u32 flags; + /** + * @name: Symbolic property name. User-space should use this field to + * recognize properties. + */ + char name[DRM_PROP_NAME_LEN]; + + /** @count_values: Number of elements in @values_ptr. */ + __u32 count_values; + /** @count_enum_blobs: Number of elements in @enum_blob_ptr. */ + __u32 count_enum_blobs; +}; + +struct drm_mode_connector_set_property { + __u64 value; + __u32 prop_id; + __u32 connector_id; +}; + +#define DRM_MODE_OBJECT_CRTC 0xcccccccc +#define DRM_MODE_OBJECT_CONNECTOR 0xc0c0c0c0 +#define DRM_MODE_OBJECT_ENCODER 0xe0e0e0e0 +#define DRM_MODE_OBJECT_MODE 0xdededede +#define DRM_MODE_OBJECT_PROPERTY 0xb0b0b0b0 +#define DRM_MODE_OBJECT_FB 0xfbfbfbfb +#define DRM_MODE_OBJECT_BLOB 0xbbbbbbbb +#define DRM_MODE_OBJECT_PLANE 0xeeeeeeee +#define DRM_MODE_OBJECT_ANY 0 + +struct drm_mode_obj_get_properties { + __u64 props_ptr; + __u64 prop_values_ptr; + __u32 count_props; + __u32 obj_id; + __u32 obj_type; +}; + +struct drm_mode_obj_set_property { + __u64 value; + __u32 prop_id; + __u32 obj_id; + __u32 obj_type; +}; + +struct drm_mode_get_blob { + __u32 blob_id; + __u32 length; + __u64 data; +}; + +struct drm_mode_fb_cmd { + __u32 fb_id; + __u32 width; + __u32 height; + __u32 pitch; + __u32 bpp; + __u32 depth; + /* driver specific handle */ + __u32 handle; +}; + +#define DRM_MODE_FB_INTERLACED (1<<0) /* for interlaced framebuffers */ +#define DRM_MODE_FB_MODIFIERS (1<<1) /* enables ->modifier[] */ + +/** + * struct drm_mode_fb_cmd2 - Frame-buffer metadata. + * + * This struct holds frame-buffer metadata. There are two ways to use it: + * + * - User-space can fill this struct and perform a &DRM_IOCTL_MODE_ADDFB2 + * ioctl to register a new frame-buffer. The new frame-buffer object ID will + * be set by the kernel in @fb_id. + * - User-space can set @fb_id and perform a &DRM_IOCTL_MODE_GETFB2 ioctl to + * fetch metadata about an existing frame-buffer. + * + * In case of planar formats, this struct allows up to 4 buffer objects with + * offsets and pitches per plane. The pitch and offset order are dictated by + * the format FourCC as defined by ``drm_fourcc.h``, e.g. NV12 is described as: + * + * YUV 4:2:0 image with a plane of 8-bit Y samples followed by an + * interleaved U/V plane containing 8-bit 2x2 subsampled colour difference + * samples. + * + * So it would consist of a Y plane at ``offsets[0]`` and a UV plane at + * ``offsets[1]``. + * + * To accommodate tiled, compressed, etc formats, a modifier can be specified. + * For more information see the "Format Modifiers" section. Note that even + * though it looks like we have a modifier per-plane, we in fact do not. The + * modifier for each plane must be identical. Thus all combinations of + * different data layouts for multi-plane formats must be enumerated as + * separate modifiers. + * + * All of the entries in @handles, @pitches, @offsets and @modifier must be + * zero when unused. Warning, for @offsets and @modifier zero can't be used to + * figure out whether the entry is used or not since it's a valid value (a zero + * offset is common, and a zero modifier is &DRM_FORMAT_MOD_LINEAR). + */ +struct drm_mode_fb_cmd2 { + /** @fb_id: Object ID of the frame-buffer. */ + __u32 fb_id; + /** @width: Width of the frame-buffer. */ + __u32 width; + /** @height: Height of the frame-buffer. */ + __u32 height; + /** + * @pixel_format: FourCC format code, see ``DRM_FORMAT_*`` constants in + * ``drm_fourcc.h``. + */ + __u32 pixel_format; + /** + * @flags: Frame-buffer flags (see &DRM_MODE_FB_INTERLACED and + * &DRM_MODE_FB_MODIFIERS). + */ + __u32 flags; + + /** + * @handles: GEM buffer handle, one per plane. Set to 0 if the plane is + * unused. The same handle can be used for multiple planes. + */ + __u32 handles[4]; + /** @pitches: Pitch (aka. stride) in bytes, one per plane. */ + __u32 pitches[4]; + /** @offsets: Offset into the buffer in bytes, one per plane. */ + __u32 offsets[4]; + /** + * @modifier: Format modifier, one per plane. See ``DRM_FORMAT_MOD_*`` + * constants in ``drm_fourcc.h``. All planes must use the same + * modifier. Ignored unless &DRM_MODE_FB_MODIFIERS is set in @flags. + */ + __u64 modifier[4]; +}; + +#define DRM_MODE_FB_DIRTY_ANNOTATE_COPY 0x01 +#define DRM_MODE_FB_DIRTY_ANNOTATE_FILL 0x02 +#define DRM_MODE_FB_DIRTY_FLAGS 0x03 + +#define DRM_MODE_FB_DIRTY_MAX_CLIPS 256 + +/* + * Mark a region of a framebuffer as dirty. + * + * Some hardware does not automatically update display contents + * as a hardware or software draw to a framebuffer. This ioctl + * allows userspace to tell the kernel and the hardware what + * regions of the framebuffer have changed. + * + * The kernel or hardware is free to update more then just the + * region specified by the clip rects. The kernel or hardware + * may also delay and/or coalesce several calls to dirty into a + * single update. + * + * Userspace may annotate the updates, the annotates are a + * promise made by the caller that the change is either a copy + * of pixels or a fill of a single color in the region specified. + * + * If the DRM_MODE_FB_DIRTY_ANNOTATE_COPY flag is given then + * the number of updated regions are half of num_clips given, + * where the clip rects are paired in src and dst. The width and + * height of each one of the pairs must match. + * + * If the DRM_MODE_FB_DIRTY_ANNOTATE_FILL flag is given the caller + * promises that the region specified of the clip rects is filled + * completely with a single color as given in the color argument. + */ + +struct drm_mode_fb_dirty_cmd { + __u32 fb_id; + __u32 flags; + __u32 color; + __u32 num_clips; + __u64 clips_ptr; +}; + +struct drm_mode_mode_cmd { + __u32 connector_id; + struct drm_mode_modeinfo mode; +}; + +#define DRM_MODE_CURSOR_BO 0x01 +#define DRM_MODE_CURSOR_MOVE 0x02 +#define DRM_MODE_CURSOR_FLAGS 0x03 + +/* + * depending on the value in flags different members are used. + * + * CURSOR_BO uses + * crtc_id + * width + * height + * handle - if 0 turns the cursor off + * + * CURSOR_MOVE uses + * crtc_id + * x + * y + */ +struct drm_mode_cursor { + __u32 flags; + __u32 crtc_id; + __s32 x; + __s32 y; + __u32 width; + __u32 height; + /* driver specific handle */ + __u32 handle; +}; + +struct drm_mode_cursor2 { + __u32 flags; + __u32 crtc_id; + __s32 x; + __s32 y; + __u32 width; + __u32 height; + /* driver specific handle */ + __u32 handle; + __s32 hot_x; + __s32 hot_y; +}; + +struct drm_mode_crtc_lut { + __u32 crtc_id; + __u32 gamma_size; + + /* pointers to arrays */ + __u64 red; + __u64 green; + __u64 blue; +}; + +struct drm_color_ctm { + /* + * Conversion matrix in S31.32 sign-magnitude + * (not two's complement!) format. + * + * out matrix in + * |R| |0 1 2| |R| + * |G| = |3 4 5| x |G| + * |B| |6 7 8| |B| + */ + __u64 matrix[9]; +}; + +struct drm_color_lut { + /* + * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and + * 0xffff == 1.0. + */ + __u16 red; + __u16 green; + __u16 blue; + __u16 reserved; +}; + +/** + * struct drm_plane_size_hint - Plane size hints + * @width: The width of the plane in pixel + * @height: The height of the plane in pixel + * + * The plane SIZE_HINTS property blob contains an + * array of struct drm_plane_size_hint. + */ +struct drm_plane_size_hint { + __u16 width; + __u16 height; +}; + +/** + * struct hdr_metadata_infoframe - HDR Metadata Infoframe Data. + * + * HDR Metadata Infoframe as per CTA 861.G spec. This is expected + * to match exactly with the spec. + * + * Userspace is expected to pass the metadata information as per + * the format described in this structure. + */ +struct hdr_metadata_infoframe { + /** + * @eotf: Electro-Optical Transfer Function (EOTF) + * used in the stream. + */ + __u8 eotf; + /** + * @metadata_type: Static_Metadata_Descriptor_ID. + */ + __u8 metadata_type; + /** + * @display_primaries: Color Primaries of the Data. + * These are coded as unsigned 16-bit values in units of + * 0.00002, where 0x0000 represents zero and 0xC350 + * represents 1.0000. + * @display_primaries.x: X coordinate of color primary. + * @display_primaries.y: Y coordinate of color primary. + */ + struct { + __u16 x, y; + } display_primaries[3]; + /** + * @white_point: White Point of Colorspace Data. + * These are coded as unsigned 16-bit values in units of + * 0.00002, where 0x0000 represents zero and 0xC350 + * represents 1.0000. + * @white_point.x: X coordinate of whitepoint of color primary. + * @white_point.y: Y coordinate of whitepoint of color primary. + */ + struct { + __u16 x, y; + } white_point; + /** + * @max_display_mastering_luminance: Max Mastering Display Luminance. + * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, + * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2. + */ + __u16 max_display_mastering_luminance; + /** + * @min_display_mastering_luminance: Min Mastering Display Luminance. + * This value is coded as an unsigned 16-bit value in units of + * 0.0001 cd/m2, where 0x0001 represents 0.0001 cd/m2 and 0xFFFF + * represents 6.5535 cd/m2. + */ + __u16 min_display_mastering_luminance; + /** + * @max_cll: Max Content Light Level. + * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, + * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2. + */ + __u16 max_cll; + /** + * @max_fall: Max Frame Average Light Level. + * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, + * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2. + */ + __u16 max_fall; +}; + +/** + * struct hdr_output_metadata - HDR output metadata + * + * Metadata Information to be passed from userspace + */ +struct hdr_output_metadata { + /** + * @metadata_type: Static_Metadata_Descriptor_ID. + */ + __u32 metadata_type; + /** + * @hdmi_metadata_type1: HDR Metadata Infoframe. + */ + union { + struct hdr_metadata_infoframe hdmi_metadata_type1; + }; +}; + +/** + * DRM_MODE_PAGE_FLIP_EVENT + * + * Request that the kernel sends back a vblank event (see + * struct drm_event_vblank) with the &DRM_EVENT_FLIP_COMPLETE type when the + * page-flip is done. + */ +#define DRM_MODE_PAGE_FLIP_EVENT 0x01 +/** + * DRM_MODE_PAGE_FLIP_ASYNC + * + * Request that the page-flip is performed as soon as possible, ie. with no + * delay due to waiting for vblank. This may cause tearing to be visible on + * the screen. + * + * When used with atomic uAPI, the driver will return an error if the hardware + * doesn't support performing an asynchronous page-flip for this update. + * User-space should handle this, e.g. by falling back to a regular page-flip. + * + * Note, some hardware might need to perform one last synchronous page-flip + * before being able to switch to asynchronous page-flips. As an exception, + * the driver will return success even though that first page-flip is not + * asynchronous. + */ +#define DRM_MODE_PAGE_FLIP_ASYNC 0x02 +#define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4 +#define DRM_MODE_PAGE_FLIP_TARGET_RELATIVE 0x8 +#define DRM_MODE_PAGE_FLIP_TARGET (DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE | \ + DRM_MODE_PAGE_FLIP_TARGET_RELATIVE) +/** + * DRM_MODE_PAGE_FLIP_FLAGS + * + * Bitmask of flags suitable for &drm_mode_crtc_page_flip_target.flags. + */ +#define DRM_MODE_PAGE_FLIP_FLAGS (DRM_MODE_PAGE_FLIP_EVENT | \ + DRM_MODE_PAGE_FLIP_ASYNC | \ + DRM_MODE_PAGE_FLIP_TARGET) + +/* + * Request a page flip on the specified crtc. + * + * This ioctl will ask KMS to schedule a page flip for the specified + * crtc. Once any pending rendering targeting the specified fb (as of + * ioctl time) has completed, the crtc will be reprogrammed to display + * that fb after the next vertical refresh. The ioctl returns + * immediately, but subsequent rendering to the current fb will block + * in the execbuffer ioctl until the page flip happens. If a page + * flip is already pending as the ioctl is called, EBUSY will be + * returned. + * + * Flag DRM_MODE_PAGE_FLIP_EVENT requests that drm sends back a vblank + * event (see drm.h: struct drm_event_vblank) when the page flip is + * done. The user_data field passed in with this ioctl will be + * returned as the user_data field in the vblank event struct. + * + * Flag DRM_MODE_PAGE_FLIP_ASYNC requests that the flip happen + * 'as soon as possible', meaning that it not delay waiting for vblank. + * This may cause tearing on the screen. + * + * The reserved field must be zero. + */ + +struct drm_mode_crtc_page_flip { + __u32 crtc_id; + __u32 fb_id; + __u32 flags; + __u32 reserved; + __u64 user_data; +}; + +/* + * Request a page flip on the specified crtc. + * + * Same as struct drm_mode_crtc_page_flip, but supports new flags and + * re-purposes the reserved field: + * + * The sequence field must be zero unless either of the + * DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags is specified. When + * the ABSOLUTE flag is specified, the sequence field denotes the absolute + * vblank sequence when the flip should take effect. When the RELATIVE + * flag is specified, the sequence field denotes the relative (to the + * current one when the ioctl is called) vblank sequence when the flip + * should take effect. NOTE: DRM_IOCTL_WAIT_VBLANK must still be used to + * make sure the vblank sequence before the target one has passed before + * calling this ioctl. The purpose of the + * DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags is merely to clarify + * the target for when code dealing with a page flip runs during a + * vertical blank period. + */ + +struct drm_mode_crtc_page_flip_target { + __u32 crtc_id; + __u32 fb_id; + __u32 flags; + __u32 sequence; + __u64 user_data; +}; + +/** + * struct drm_mode_create_dumb - Create a KMS dumb buffer for scanout. + * @height: buffer height in pixels + * @width: buffer width in pixels + * @bpp: bits per pixel + * @flags: must be zero + * @handle: buffer object handle + * @pitch: number of bytes between two consecutive lines + * @size: size of the whole buffer in bytes + * + * User-space fills @height, @width, @bpp and @flags. If the IOCTL succeeds, + * the kernel fills @handle, @pitch and @size. + */ +struct drm_mode_create_dumb { + __u32 height; + __u32 width; + __u32 bpp; + __u32 flags; + + __u32 handle; + __u32 pitch; + __u64 size; +}; + +/* set up for mmap of a dumb scanout buffer */ +struct drm_mode_map_dumb { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 pad; + /** + * Fake offset to use for subsequent mmap call + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 offset; +}; + +struct drm_mode_destroy_dumb { + __u32 handle; +}; + +/** + * DRM_MODE_ATOMIC_TEST_ONLY + * + * Do not apply the atomic commit, instead check whether the hardware supports + * this configuration. + * + * See &drm_mode_config_funcs.atomic_check for more details on test-only + * commits. + */ +#define DRM_MODE_ATOMIC_TEST_ONLY 0x0100 +/** + * DRM_MODE_ATOMIC_NONBLOCK + * + * Do not block while applying the atomic commit. The &DRM_IOCTL_MODE_ATOMIC + * IOCTL returns immediately instead of waiting for the changes to be applied + * in hardware. Note, the driver will still check that the update can be + * applied before retuning. + */ +#define DRM_MODE_ATOMIC_NONBLOCK 0x0200 +/** + * DRM_MODE_ATOMIC_ALLOW_MODESET + * + * Allow the update to result in temporary or transient visible artifacts while + * the update is being applied. Applying the update may also take significantly + * more time than a page flip. All visual artifacts will disappear by the time + * the update is completed, as signalled through the vblank event's timestamp + * (see struct drm_event_vblank). + * + * This flag must be set when the KMS update might cause visible artifacts. + * Without this flag such KMS update will return a EINVAL error. What kind of + * update may cause visible artifacts depends on the driver and the hardware. + * User-space that needs to know beforehand if an update might cause visible + * artifacts can use &DRM_MODE_ATOMIC_TEST_ONLY without + * &DRM_MODE_ATOMIC_ALLOW_MODESET to see if it fails. + * + * To the best of the driver's knowledge, visual artifacts are guaranteed to + * not appear when this flag is not set. Some sinks might display visual + * artifacts outside of the driver's control. + */ +#define DRM_MODE_ATOMIC_ALLOW_MODESET 0x0400 + +/** + * DRM_MODE_ATOMIC_FLAGS + * + * Bitfield of flags accepted by the &DRM_IOCTL_MODE_ATOMIC IOCTL in + * &drm_mode_atomic.flags. + */ +#define DRM_MODE_ATOMIC_FLAGS (\ + DRM_MODE_PAGE_FLIP_EVENT |\ + DRM_MODE_PAGE_FLIP_ASYNC |\ + DRM_MODE_ATOMIC_TEST_ONLY |\ + DRM_MODE_ATOMIC_NONBLOCK |\ + DRM_MODE_ATOMIC_ALLOW_MODESET) + +struct drm_mode_atomic { + __u32 flags; + __u32 count_objs; + __u64 objs_ptr; + __u64 count_props_ptr; + __u64 props_ptr; + __u64 prop_values_ptr; + __u64 reserved; + __u64 user_data; +}; + +struct drm_format_modifier_blob { +#define FORMAT_BLOB_CURRENT 1 + /* Version of this blob format */ + __u32 version; + + /* Flags */ + __u32 flags; + + /* Number of fourcc formats supported */ + __u32 count_formats; + + /* Where in this blob the formats exist (in bytes) */ + __u32 formats_offset; + + /* Number of drm_format_modifiers */ + __u32 count_modifiers; + + /* Where in this blob the modifiers exist (in bytes) */ + __u32 modifiers_offset; + + /* __u32 formats[] */ + /* struct drm_format_modifier modifiers[] */ +}; + +struct drm_format_modifier { + /* Bitmask of formats in get_plane format list this info applies to. The + * offset allows a sliding window of which 64 formats (bits). + * + * Some examples: + * In today's world with < 65 formats, and formats 0, and 2 are + * supported + * 0x0000000000000005 + * ^-offset = 0, formats = 5 + * + * If the number formats grew to 128, and formats 98-102 are + * supported with the modifier: + * + * 0x0000007c00000000 0000000000000000 + * ^ + * |__offset = 64, formats = 0x7c00000000 + * + */ + __u64 formats; + __u32 offset; + __u32 pad; + + /* The modifier that applies to the >get_plane format list bitmask. */ + __u64 modifier; +}; + +/** + * struct drm_mode_create_blob - Create New blob property + * + * Create a new 'blob' data property, copying length bytes from data pointer, + * and returning new blob ID. + */ +struct drm_mode_create_blob { + /** @data: Pointer to data to copy. */ + __u64 data; + /** @length: Length of data to copy. */ + __u32 length; + /** @blob_id: Return: new property ID. */ + __u32 blob_id; +}; + +/** + * struct drm_mode_destroy_blob - Destroy user blob + * @blob_id: blob_id to destroy + * + * Destroy a user-created blob property. + * + * User-space can release blobs as soon as they do not need to refer to them by + * their blob object ID. For instance, if you are using a MODE_ID blob in an + * atomic commit and you will not make another commit re-using the same ID, you + * can destroy the blob as soon as the commit has been issued, without waiting + * for it to complete. + */ +struct drm_mode_destroy_blob { + __u32 blob_id; +}; + +/** + * struct drm_mode_create_lease - Create lease + * + * Lease mode resources, creating another drm_master. + * + * The @object_ids array must reference at least one CRTC, one connector and + * one plane if &DRM_CLIENT_CAP_UNIVERSAL_PLANES is enabled. Alternatively, + * the lease can be completely empty. + */ +struct drm_mode_create_lease { + /** @object_ids: Pointer to array of object ids (__u32) */ + __u64 object_ids; + /** @object_count: Number of object ids */ + __u32 object_count; + /** @flags: flags for new FD (O_CLOEXEC, etc) */ + __u32 flags; + + /** @lessee_id: Return: unique identifier for lessee. */ + __u32 lessee_id; + /** @fd: Return: file descriptor to new drm_master file */ + __u32 fd; +}; + +/** + * struct drm_mode_list_lessees - List lessees + * + * List lesses from a drm_master. + */ +struct drm_mode_list_lessees { + /** + * @count_lessees: Number of lessees. + * + * On input, provides length of the array. + * On output, provides total number. No + * more than the input number will be written + * back, so two calls can be used to get + * the size and then the data. + */ + __u32 count_lessees; + /** @pad: Padding. */ + __u32 pad; + + /** + * @lessees_ptr: Pointer to lessees. + * + * Pointer to __u64 array of lessee ids + */ + __u64 lessees_ptr; +}; + +/** + * struct drm_mode_get_lease - Get Lease + * + * Get leased objects. + */ +struct drm_mode_get_lease { + /** + * @count_objects: Number of leased objects. + * + * On input, provides length of the array. + * On output, provides total number. No + * more than the input number will be written + * back, so two calls can be used to get + * the size and then the data. + */ + __u32 count_objects; + /** @pad: Padding. */ + __u32 pad; + + /** + * @objects_ptr: Pointer to objects. + * + * Pointer to __u32 array of object ids. + */ + __u64 objects_ptr; +}; + +/** + * struct drm_mode_revoke_lease - Revoke lease + */ +struct drm_mode_revoke_lease { + /** @lessee_id: Unique ID of lessee */ + __u32 lessee_id; +}; + +/** + * struct drm_mode_rect - Two dimensional rectangle. + * @x1: Horizontal starting coordinate (inclusive). + * @y1: Vertical starting coordinate (inclusive). + * @x2: Horizontal ending coordinate (exclusive). + * @y2: Vertical ending coordinate (exclusive). + * + * With drm subsystem using struct drm_rect to manage rectangular area this + * export it to user-space. + * + * Currently used by drm_mode_atomic blob property FB_DAMAGE_CLIPS. + */ +struct drm_mode_rect { + __s32 x1; + __s32 y1; + __s32 x2; + __s32 y2; +}; + +/** + * struct drm_mode_closefb + * @fb_id: Framebuffer ID. + * @pad: Must be zero. + */ +struct drm_mode_closefb { + __u32 fb_id; + __u32 pad; +}; + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/plugins/amdgpu/kfd_ioctl.h b/plugins/amdgpu/kfd_ioctl.h index 1a3bcea95..a63d453f0 100644 --- a/plugins/amdgpu/kfd_ioctl.h +++ b/plugins/amdgpu/kfd_ioctl.h @@ -23,9 +23,12 @@ #ifndef KFD_IOCTL_H_INCLUDED #define KFD_IOCTL_H_INCLUDED -#include #include +/* Define __user as empty for kernel headers in user-space */ +#define __user +#include "drm.h" + /* * - 1.1 - initial version * - 1.3 - Add SMI events support diff --git a/plugins/cuda/Makefile b/plugins/cuda/Makefile index cc3d98ac9..2c1944a34 100644 --- a/plugins/cuda/Makefile +++ b/plugins/cuda/Makefile @@ -19,7 +19,7 @@ all: $(DEPS_CUDA) cuda_plugin.so: cuda_plugin.c $(call msg-gen, $@) - $(Q) $(CC) $(PLUGIN_CFLAGS) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) + $(Q) $(CC) $(PLUGIN_CFLAGS) $(DEFINES) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) clean: $(call msg-clean, $@) diff --git a/scripts/ci/docker-test.sh b/scripts/ci/docker-test.sh index bc5a74667..c1c745544 100755 --- a/scripts/ci/docker-test.sh +++ b/scripts/ci/docker-test.sh @@ -2,7 +2,7 @@ set -x -e -o pipefail -# Workaround: Docker 28.x has a known regression that breaks the checkpoint and +# Workaround: Docker 28.x and 29.x has a known regression that breaks the checkpoint and # restore (C/R) feature. Let's install previous, or next major version. See # https://github.com/moby/moby/issues/50750 for details on the bug. export DEBIAN_FRONTEND=noninteractive @@ -17,7 +17,7 @@ echo \ $(. /etc/os-release && echo "${UBUNTU_CODENAME:-$VERSION_CODENAME}") stable" > /etc/apt/sources.list.d/docker.list apt update -y apt-cache madison docker-ce | awk '{ print $3 }' -verstr="$(apt-cache madison docker-ce | awk '{ print $3 }' | sort | grep -v ':28\.'| tail -n 1)" +verstr="$(apt-cache madison docker-ce | awk '{ print $3 }' | sort | grep -Ev ':(28|29)\.'| tail -n 1)" ../../contrib/apt-install -y "docker-ce=$verstr" "docker-ce-cli=$verstr" # docker checkpoint and restore is an experimental feature diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 465285f08..c95b4ef6a 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -79,6 +79,14 @@ define pkg-cflags $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(PKG_CONFIG) --cflags $(1)) endef +ifeq ($(GCS_ENABLE),1) + CFLAGS += -mbranch-protection=standard + LDFLAGS += -z experimental-gcs=check + TEST_ENV = GLIBC_TUNABLES=glibc.cpu.aarch64_gcs=1:glibc.cpu.aarch64_gcs_policy=2 +else + TEST_ENV = +endif + %.d: %.c $(E) " DEP " $@ $(Q)$(CC) $(CFLAGS) $(CPPFLAGS) -MM -MP $< -o $@ diff --git a/test/zdtm/lib/fs.c b/test/zdtm/lib/fs.c index bf8cd9cd3..efcc7a1d0 100644 --- a/test/zdtm/lib/fs.c +++ b/test/zdtm/lib/fs.c @@ -54,7 +54,7 @@ mnt_info_t *get_cwd_mnt_info(void) while (fgets(str, sizeof(str), f)) { char *hyphen = strchr(str, '-'); - ret = sscanf(str, "%i %i %u:%u %s %s", &mnt_id, &parent_mnt_id, &kmaj, &kmin, root, mountpoint); + ret = sscanf(str, "%i %i %u:%u %4095s %4095s", &mnt_id, &parent_mnt_id, &kmaj, &kmin, root, mountpoint); if (ret != 6 || !hyphen) goto err; ret = sscanf(hyphen + 1, " %ms", &fsname); diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index ea901a805..e1df2e5fa 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -520,30 +520,30 @@ install: all .PHONY: all install $(TST_NOFILE:%=%.pid): %.pid: % - $(