From 36e7999dc19a1a6e76258020a49f7f7836018e46 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 3 Dec 2018 14:24:33 -0800 Subject: drm/v3d: Document cache flushing ABI. Right now, userspace doesn't do any L2T writes, but we should lay out our expectations for how it works. v2: Explicitly mention the VCD cache flushing requirements and that we'll flush the other caches before each of the CLs. Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20181203222438.25417-1-eric@anholt.net Reviewed-by: Dave Emett --- include/uapi/drm/v3d_drm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 35c7d813c66e..ea70669d2138 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -52,6 +52,14 @@ extern "C" { * * This asks the kernel to have the GPU execute an optional binner * command list, and a render command list. + * + * The L1T, slice, L2C, L2T, and GCA caches will be flushed before + * each CL executes. The VCD cache should be flushed (if necessary) + * by the submitted CLs. The TLB writes are guaranteed to have been + * flushed by the time the render done IRQ happens, which is the + * trigger for out_sync. Any dirtying of cachelines by the job (only + * possible using TMU writes) must be flushed by the caller using the + * CL's cache flush commands. */ struct drm_v3d_submit_cl { /* Pointer to the binner command list. -- cgit v1.2.3 From 3affaa5a7ca3ca114e01049bc45e3ed4a3955505 Mon Sep 17 00:00:00 2001 From: Brian Starkey Date: Mon, 3 Dec 2018 11:31:57 +0000 Subject: drm/afbc: Add AFBC modifier usage documentation AFBC is a flexible, proprietary, lossless compression protocol and format, with a number of defined DRM format modifiers. To facilitate consistency and compatibility between different AFBC producers and consumers, document the expectations for usage of the AFBC DRM format modifiers in a new .rst chapter. Signed-off-by: Brian Starkey Reviewed-by: Liviu Dudau [Updated MAINTAINERS entry to show that "Mali DP Maintainers" is actually a mailing list and added an SPDX-License-Identifier to the documentation] Signed-off-by: Liviu Dudau --- include/uapi/drm/drm_fourcc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 0b44260a5ee9..df014ede4e57 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -572,6 +572,9 @@ extern "C" { * AFBC has several features which may be supported and/or used, which are * represented using bits in the modifier. Not all combinations are valid, * and different devices or use-cases may support different combinations. + * + * Further information on the use of AFBC modifiers can be found in + * Documentation/gpu/afbc.rst */ #define DRM_FORMAT_MOD_ARM_AFBC(__afbc_mode) fourcc_mod_code(ARM, __afbc_mode) -- cgit v1.2.3 From 2db8ebca1f6c158d0d439ebec4d4b9924c4c3d3c Mon Sep 17 00:00:00 2001 From: Matteo Franchin Date: Wed, 9 Jan 2019 15:58:37 +0000 Subject: drm/fourcc: Add modifier defininitions for AFBC 1.3 This commit adds definitions of format modifiers for version 1.3 of the Arm Framebuffer Compression (AFBC). Signed-off-by: Matteo Franchin Signed-off-by: Ayan Kumar Halder Reviewed-by: Brian Starkey Link: https://patchwork.freedesktop.org/patch/277333/ --- include/uapi/drm/drm_fourcc.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 0b44260a5ee9..41106c835747 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -581,10 +581,18 @@ extern "C" { * Indicates the superblock size(s) used for the AFBC buffer. The buffer * size (in pixels) must be aligned to a multiple of the superblock size. * Four lowest significant bits(LSBs) are reserved for block size. + * + * Where one superblock size is specified, it applies to all planes of the + * buffer (e.g. 16x16, 32x8). When multiple superblock sizes are specified, + * the first applies to the Luma plane and the second applies to the Chroma + * plane(s). e.g. (32x8_64x4 means 32x8 Luma, with 64x4 Chroma). + * Multiple superblock sizes are only valid for multi-plane YCbCr formats. */ #define AFBC_FORMAT_MOD_BLOCK_SIZE_MASK 0xf #define AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 (1ULL) #define AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 (2ULL) +#define AFBC_FORMAT_MOD_BLOCK_SIZE_64x4 (3ULL) +#define AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4 (4ULL) /* * AFBC lossless colorspace transform @@ -644,6 +652,21 @@ extern "C" { */ #define AFBC_FORMAT_MOD_SC (1ULL << 9) +/* + * AFBC double-buffer + * + * Indicates that the buffer is allocated in a layout safe for front-buffer + * rendering. + */ +#define AFBC_FORMAT_MOD_DB (1ULL << 10) + +/* + * AFBC buffer content hints + * + * Indicates that the buffer includes per-superblock content hints. + */ +#define AFBC_FORMAT_MOD_BCH (1ULL << 11) + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 08cba016cdbe4f6bf0200366194a768cfef58edd Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Fri, 18 Jan 2019 15:51:21 +0100 Subject: drm/fourcc: Add definitions for Allwinner vendor and VPU tiled format This introduces specific definitions for vendor Allwinner and its associated tiled format modifier. This modifier is used for the output format of the VPU, that can be imported directly with the display engine hardware supported by the sun4i-drm driver. Signed-off-by: Paul Kocialkowski Reviewed-by: Maxime Ripard Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190118145133.21281-12-paul.kocialkowski@bootlin.com --- include/uapi/drm/drm_fourcc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 41106c835747..91d08a23f125 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -238,6 +238,8 @@ extern "C" { #define DRM_FORMAT_MOD_VENDOR_VIVANTE 0x06 #define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07 #define DRM_FORMAT_MOD_VENDOR_ARM 0x08 +#define DRM_FORMAT_MOD_VENDOR_ALLWINNER 0x09 + /* add more to the end as needed */ #define DRM_FORMAT_RESERVED ((1ULL << 56) - 1) @@ -667,6 +669,20 @@ extern "C" { */ #define AFBC_FORMAT_MOD_BCH (1ULL << 11) +/* + * Allwinner tiled modifier + * + * This tiling mode is implemented by the VPU found on all Allwinner platforms, + * codenamed sunxi. It is associated with a YUV format that uses either 2 or 3 + * planes. + * + * With this tiling, the luminance samples are disposed in tiles representing + * 32x32 pixels and the chrominance samples in tiles representing 32x64 pixels. + * The pixel order in each tile is linear and the tiles are disposed linearly, + * both in row-major order. + */ +#define DRM_FORMAT_MOD_ALLWINNER_TILED fourcc_mod_code(ALLWINNER, 1) + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From e46c2e99f60043bfdb63f18fe775db1f688906d9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 5 Feb 2019 09:50:31 +0000 Subject: drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to allow userspace to reconfigure the subslice configuration on a per context basis. This is required for the functional requirement of shutting down non-VME enabled sub-slices on Gen11 parts. To do so, we expose a context parameter to allow adjustment of the RPCS register stored within the context image (and currently not accessible via LRI). If the context is adjusted before first use or whilst idle, the adjustment is for "free"; otherwise if the context is active we queue a request to do so (using the kernel context), following all other activity by that context, which is also marked as barrier for all following submission against the same context. Since the overhead of device re-configuration during context switching can be significant, especially in multi-context workloads, we limit this new uAPI to only support the Gen11 VME use case. In this use case either the device is fully enabled, and exactly one slice and half of the subslices are enabled. Example usage: struct drm_i915_gem_context_param_sseu sseu = { }; struct drm_i915_gem_context_param arg = { .param = I915_CONTEXT_PARAM_SSEU, .ctx_id = gem_context_create(fd), .size = sizeof(sseu), .value = to_user_pointer(&sseu) }; /* Query device defaults. */ gem_context_get_param(fd, &arg); /* Set VME configuration on a 1x6x8 part. */ sseu.slice_mask = 0x1; sseu.subslice_mask = 0xe0; gem_context_set_param(fd, &arg); v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel) v3: Add ability to program this per engine (Chris) v4: Move most get_sseu() into i915_gem_context.c (Lionel) v5: Validate sseu configuration against the device's capabilities (Lionel) v6: Change context powergating settings through MI_SDM on kernel context (Chris) v7: Synchronize the requests following a powergating setting change using a global dependency (Chris) Iterate timelines through dev_priv.gt.active_rings (Tvrtko) Disable RPCS configuration setting for non capable users (Lionel/Tvrtko) v8: s/union intel_sseu/struct intel_sseu/ (Lionel) s/dev_priv/i915/ (Tvrtko) Change uapi class/instance fields to u16 (Tvrtko) Bump mask fields to 64bits (Lionel) Don't return EPERM when dynamic sseu is disabled (Tvrtko) v9: Import context image into kernel context's ppgtt only when reconfiguring powergated slice/subslices (Chris) Use aliasing ppgtt when needed (Michel) Tvrtko Ursulin: v10: * Update for upstream changes. * Request submit needs a RPM reference. * Reject on !FULL_PPGTT for simplicity. * Pull out get/set param to helpers for readability and less indent. * Use i915_request_await_dma_fence in add_global_barrier to skip waits on the same timeline and avoid GEM_BUG_ON. * No need to explicitly assign a NULL pointer to engine in legacy mode. * No need to move gen8_make_rpcs up. * Factored out global barrier as prep patch. * Allow to only CAP_SYS_ADMIN if !Gen11. v11: * Remove engine vfunc in favour of local helper. (Chris Wilson) * Stop retiring requests before updates since it is not needed (Chris Wilson) * Implement direct CPU update path for idle contexts. (Chris Wilson) * Left side dependency needs only be on the same context timeline. (Chris Wilson) * It is sufficient to order the timeline. (Chris Wilson) * Reject !RCS configuration attempts with -ENODEV for now. v12: * Rebase for make_rpcs. v13: * Centralize SSEU normalization to make_rpcs. * Type width checking (uAPI <-> implementation). * Gen11 restrictions uAPI checks. * Gen11 subslice count differences handling. Chris Wilson: * args->size handling fixes. * Update context image from GGTT. * Postpone context image update to pinning. * Use i915_gem_active_raw instead of last_request_on_engine. v14: * Add activity tracker on intel_context to fix the lifetime issues and simplify the code. (Chris Wilson) v15: * Fix context pin leak if no space in ring by simplifying the context pinning sequence. v16: * Rebase for context get/set param locking changes. * Just -ENODEV on !Gen11. (Joonas) v17: * Fix one Gen11 subslice enablement rule. * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson) v18: * Update commit message. (Joonas) * Restrict uAPI to VME use case. (Joonas) v19: * Rebase. v20: * Rebase for ce->active_tracker. v21: * Rebase for IS_GEN changes. v22: * Reserve uAPI for flags straight away. (Chris Wilson) v23: * Rebase for RUNTIME_INFO. v24: * Added some headline docs for the uapi usage. (Joonas/Chris) v25: * Renamed class/instance to engine_class/engine_instance to avoid clash with C++ keyword. (Tony Ye) v26: * Rebased for runtime pm api changes. v27: * Rebased for intel_context_init. * Wrap commit msg to 75. v28: (Chris Wilson) * Use i915_gem_ggtt. * Use i915_request_await_dma_fence to show a better example. v29: * i915_timeline_set_barrier can now fail. (Chris Wilson) v30: * Capture some acks. v31: * Drop the WARN_ON from use controllable paths. (Chris Wilson) * Use overflows_type for all checks. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634 Issue: https://github.com/intel/media-driver/issues/267 Signed-off-by: Chris Wilson Signed-off-by: Lionel Landwerlin Cc: Dmitry Rogozhkin Cc: Tvrtko Ursulin Cc: Zhipeng Gong Cc: Joonas Lahtinen Cc: Tony Ye Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Timo Aaltonen Acked-by: Takashi Iwai Acked-by: Stéphane Marchesin Link: https://patchwork.freedesktop.org/patch/msgid/20190205095032.22673-4-tvrtko.ursulin@linux.intel.com --- include/uapi/drm/i915_drm.h | 64 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 298b2e197744..397810fa2d33 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1486,9 +1486,73 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ #define I915_CONTEXT_DEFAULT_PRIORITY 0 #define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ + /* + * When using the following param, value should be a pointer to + * drm_i915_gem_context_param_sseu. + */ +#define I915_CONTEXT_PARAM_SSEU 0x7 __u64 value; }; +/** + * Context SSEU programming + * + * It may be necessary for either functional or performance reason to configure + * a context to run with a reduced number of SSEU (where SSEU stands for Slice/ + * Sub-slice/EU). + * + * This is done by configuring SSEU configuration using the below + * @struct drm_i915_gem_context_param_sseu for every supported engine which + * userspace intends to use. + * + * Not all GPUs or engines support this functionality in which case an error + * code -ENODEV will be returned. + * + * Also, flexibility of possible SSEU configuration permutations varies between + * GPU generations and software imposed limitations. Requesting such a + * combination will return an error code of -EINVAL. + * + * NOTE: When perf/OA is active the context's SSEU configuration is ignored in + * favour of a single global setting. + */ +struct drm_i915_gem_context_param_sseu { + /* + * Engine class & instance to be configured or queried. + */ + __u16 engine_class; + __u16 engine_instance; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * Mask of slices to enable for the context. Valid values are a subset + * of the bitmask value returned for I915_PARAM_SLICE_MASK. + */ + __u64 slice_mask; + + /* + * Mask of subslices to enable for the context. Valid values are a + * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK. + */ + __u64 subslice_mask; + + /* + * Minimum/Maximum number of EUs to enable per subslice for the + * context. min_eus_per_subslice must be inferior or equal to + * max_eus_per_subslice. + */ + __u16 min_eus_per_subslice; + __u16 max_eus_per_subslice; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 rsvd; +}; + enum drm_i915_oa_format { I915_OA_FORMAT_A13 = 1, /* HSW only */ I915_OA_FORMAT_A29, /* HSW only */ -- cgit v1.2.3 From 67dd1a36334ffce82bebeb2d633e152aa436d370 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Thu, 31 Jan 2019 15:44:22 -0500 Subject: drm/amdgpu: Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New chunk for dependency on start of job's execution instead on the end. This is used for GPU deadlock prevention when userspace uses mid-IB fences to wait for mid-IB work on other rings. v2: Fix typo in AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES v3: Bump KMS version v4: put old fence AFTER acquiring the scheduled fence. Signed-off-by: Andrey Grodzovsky Suggested-by: Christian Koenig Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index be84e43c1e19..47296f2ec3fa 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -523,6 +523,7 @@ struct drm_amdgpu_gem_va { #define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 #define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 +#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 struct drm_amdgpu_cs_chunk { __u32 chunk_id; -- cgit v1.2.3 From 41cca166cc57e75e94d888595a428d23a3bf4e36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 21 Jan 2019 17:22:55 -0500 Subject: drm/amdgpu: add a workaround for GDS ordered append hangs with compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm not increasing the DRM version because GDS isn't totally without bugs yet. v2: update emit_ib_size Signed-off-by: Marek Olšák Acked-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 47296f2ec3fa..2acbc8bc465b 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -566,6 +566,11 @@ union drm_amdgpu_cs { * caches (L2/vL1/sL1/I$). */ #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) +/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER. + * This will reset wave ID counters for the IB. + */ +#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) + struct drm_amdgpu_cs_chunk_ib { __u32 _pad; /** AMDGPU_IB_FLAG_* */ -- cgit v1.2.3 From 05f8bc82fc428dbaf41764e95167dd759769f33d Mon Sep 17 00:00:00 2001 From: Randy Li Date: Thu, 10 Jan 2019 03:57:09 +0800 Subject: drm/fourcc: Add new P010, P016 video format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P010 is a planar 4:2:0 YUV with interleaved UV plane, 10 bits per channel video format. P012 is a planar 4:2:0 YUV 12 bits per channel P016 is a planar 4:2:0 YUV with interleaved UV plane, 16 bits per channel video format. V3: Added P012 and fixed cpp for P010. V4: format definition refined per review. V5: Format comment block for each new pixel format. V6: reversed Cb/Cr order in comments. v7: reversed Cb/Cr order in comments of header files, remove the wrong part of commit message. V8: reversed V7 changes except commit message and rebased. v9: used the new properties to describe those format and rebased. Cc: Daniel Stone Cc: Ville Syrjälä Signed-off-by: Randy Li Signed-off-by: Clint Taylor Reviewed-by: Ayan Kumar Halder Reviewed-by: Neil Armstrong Reviewed-by: Daniel Vetter Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20190109195710.28501-2-ayaka@soulik.info --- include/uapi/drm/drm_fourcc.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 91d08a23f125..0fa360223255 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -195,6 +195,27 @@ extern "C" { #define DRM_FORMAT_NV24 fourcc_code('N', 'V', '2', '4') /* non-subsampled Cr:Cb plane */ #define DRM_FORMAT_NV42 fourcc_code('N', 'V', '4', '2') /* non-subsampled Cb:Cr plane */ +/* + * 2 plane YCbCr MSB aligned + * index 0 = Y plane, [15:0] Y:x [10:6] little endian + * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [10:6:10:6] little endian + */ +#define DRM_FORMAT_P010 fourcc_code('P', '0', '1', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel */ + +/* + * 2 plane YCbCr MSB aligned + * index 0 = Y plane, [15:0] Y:x [12:4] little endian + * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [12:4:12:4] little endian + */ +#define DRM_FORMAT_P012 fourcc_code('P', '0', '1', '2') /* 2x2 subsampled Cr:Cb plane 12 bits per channel */ + +/* + * 2 plane YCbCr MSB aligned + * index 0 = Y plane, [15:0] Y little endian + * index 1 = Cr:Cb plane, [31:0] Cr:Cb [16:16] little endian + */ +#define DRM_FORMAT_P016 fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits per channel */ + /* * 3 plane YCbCr * index 0: Y plane, [7:0] Y -- cgit v1.2.3 From b5bb37eddb63b16b7ab959598d108b1c444be77d Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Wed, 30 Jan 2019 02:53:22 +0100 Subject: drm/amdgpu: Add command to override the context priority. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Given a master fd we can then override the priority of the context in another fd. Using these overrides was recommended by Christian instead of trying to submit from a master fd, and I am adding a way to override a single context instead of the entire process so we can only upgrade a single Vulkan queue and not effectively the entire process. Reused the flags field as it was checked to be 0 anyways, so nothing used it. This is source-incompatible (due to the name change), but ABI compatible. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 2acbc8bc465b..4a53f6cfa034 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -272,13 +272,14 @@ union drm_amdgpu_vm { /* sched ioctl */ #define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE 1 +#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE 2 struct drm_amdgpu_sched_in { /* AMDGPU_SCHED_OP_* */ __u32 op; __u32 fd; __s32 priority; - __u32 flags; + __u32 ctx_id; }; union drm_amdgpu_sched { -- cgit v1.2.3 From eeaf06ac1a5584e41cf289f8351e446bb131374b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 5 Jul 2018 12:57:12 +1000 Subject: drm/nouveau/svm: initial support for shared virtual memory This uses HMM to mirror a process' CPU page tables into a channel's page tables, and keep them synchronised so that both the CPU and GPU are able to access the same memory at the same virtual address. While this code also supports Volta/Turing, it's only enabled for Pascal GPUs currently due to channel recovery being unreliable right now on the later GPUs. Signed-off-by: Ben Skeggs --- include/uapi/drm/nouveau_drm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 259588a4b61b..afac182c80d8 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -133,12 +133,20 @@ struct drm_nouveau_gem_cpu_fini { #define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC 0x05 /* deprecated */ #define DRM_NOUVEAU_GPUOBJ_FREE 0x06 /* deprecated */ #define DRM_NOUVEAU_NVIF 0x07 +#define DRM_NOUVEAU_SVM_INIT 0x08 #define DRM_NOUVEAU_GEM_NEW 0x40 #define DRM_NOUVEAU_GEM_PUSHBUF 0x41 #define DRM_NOUVEAU_GEM_CPU_PREP 0x42 #define DRM_NOUVEAU_GEM_CPU_FINI 0x43 #define DRM_NOUVEAU_GEM_INFO 0x44 +struct drm_nouveau_svm_init { + __u64 unmanaged_addr; + __u64 unmanaged_size; +}; + +#define DRM_IOCTL_NOUVEAU_SVM_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_INIT, struct drm_nouveau_svm_init) + #define DRM_IOCTL_NOUVEAU_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new) #define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf) #define DRM_IOCTL_NOUVEAU_GEM_CPU_PREP DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_PREP, struct drm_nouveau_gem_cpu_prep) -- cgit v1.2.3 From f180bf12ac061f093abb9247505f661817973cae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Tue, 7 Aug 2018 16:13:16 -0400 Subject: drm/nouveau/svm: new ioctl to migrate process memory to GPU memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This add an ioctl to migrate a range of process address space to the device memory. On platform without cache coherent bus (x86, ARM, ...) this means that CPU can not access that range directly, instead CPU will fault which will migrate the memory back to system memory. This is behind a staging flag so that we can evolve the API. Signed-off-by: Jérôme Glisse --- include/uapi/drm/nouveau_drm.h | 43 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index afac182c80d8..9459a6e3bc1f 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -134,6 +134,7 @@ struct drm_nouveau_gem_cpu_fini { #define DRM_NOUVEAU_GPUOBJ_FREE 0x06 /* deprecated */ #define DRM_NOUVEAU_NVIF 0x07 #define DRM_NOUVEAU_SVM_INIT 0x08 +#define DRM_NOUVEAU_SVM_BIND 0x09 #define DRM_NOUVEAU_GEM_NEW 0x40 #define DRM_NOUVEAU_GEM_PUSHBUF 0x41 #define DRM_NOUVEAU_GEM_CPU_PREP 0x42 @@ -145,7 +146,49 @@ struct drm_nouveau_svm_init { __u64 unmanaged_size; }; +struct drm_nouveau_svm_bind { + __u64 header; + __u64 va_start; + __u64 va_end; + __u64 npages; + __u64 stride; + __u64 result; + __u64 reserved0; + __u64 reserved1; +}; + +#define NOUVEAU_SVM_BIND_COMMAND_SHIFT 0 +#define NOUVEAU_SVM_BIND_COMMAND_BITS 8 +#define NOUVEAU_SVM_BIND_COMMAND_MASK ((1 << 8) - 1) +#define NOUVEAU_SVM_BIND_PRIORITY_SHIFT 8 +#define NOUVEAU_SVM_BIND_PRIORITY_BITS 8 +#define NOUVEAU_SVM_BIND_PRIORITY_MASK ((1 << 8) - 1) +#define NOUVEAU_SVM_BIND_TARGET_SHIFT 16 +#define NOUVEAU_SVM_BIND_TARGET_BITS 32 +#define NOUVEAU_SVM_BIND_TARGET_MASK 0xffffffff + +/* + * Below is use to validate ioctl argument, userspace can also use it to make + * sure that no bit are set beyond known fields for a given kernel version. + */ +#define NOUVEAU_SVM_BIND_VALID_BITS 48 +#define NOUVEAU_SVM_BIND_VALID_MASK ((1ULL << NOUVEAU_SVM_BIND_VALID_BITS) - 1) + + +/* + * NOUVEAU_BIND_COMMAND__MIGRATE: synchronous migrate to target memory. + * result: number of page successfuly migrate to the target memory. + */ +#define NOUVEAU_SVM_BIND_COMMAND__MIGRATE 0 + +/* + * NOUVEAU_SVM_BIND_HEADER_TARGET__GPU_VRAM: target the GPU VRAM memory. + */ +#define NOUVEAU_SVM_BIND_TARGET__GPU_VRAM (1UL << 31) + + #define DRM_IOCTL_NOUVEAU_SVM_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_INIT, struct drm_nouveau_svm_init) +#define DRM_IOCTL_NOUVEAU_SVM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_BIND, struct drm_nouveau_svm_bind) #define DRM_IOCTL_NOUVEAU_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new) #define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf) -- cgit v1.2.3