From 11ecbdddf2f8b6cc2480aff6d877b7a4076e3b7f Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 17 Mar 2020 15:22:22 +0200 Subject: drm/i915/perf: introduce global sseu pinning On Gen11 powergating half the execution units is a functional requirement when using the VME samplers. Not fullfilling this requirement can lead to hangs. This unfortunately plays fairly poorly with the NOA requirements. NOA requires a stable power configuration to maintain its configuration. As a result using OA (and NOA feeding into it) so far has required us to use a power configuration that can work for all contexts. The only power configuration fullfilling this is powergating half the execution units. This makes performance analysis for 3D workloads somewhat pointless. Failing to find a solution that would work for everybody, this change introduces a new i915-perf stream open parameter that punts the decision off to userspace. If this parameter is omitted, the existing Gen11 behavior remains (half EU array powergating). This change takes the initiative to move all perf related sseu configuration into i915_perf.c v2: Make parameter priviliged if different from default v3: Fix context modifying its sseu config while i915-perf is enabled v4: Always consider global sseu a privileged operation (Tvrtko) Override req_sseu point in intel_sseu_make_rpcs() (Tvrtko) Remove unrelated changes (Tvrtko) v5: Some typos (Tvrtko) Process sseu param in read_properties_unlocked() (Tvrtko) v6: Actually commit the bits from v5... Fixup some checkpath warnings v7: Only compare engine uabi field (Chris) Signed-off-by: Lionel Landwerlin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200317132222.2638719-3-lionel.g.landwerlin@intel.com --- include/uapi/drm/i915_drm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2813e579b480..db649d03ab52 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1969,6 +1969,17 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_HOLD_PREEMPTION, + /** + * Specifying this pins all contexts to the specified SSEU power + * configuration for the duration of the recording. + * + * This parameter's value is a pointer to a struct + * drm_i915_gem_context_param_sseu. + * + * This property is available in perf revision 4. + */ + DRM_I915_PERF_PROP_GLOBAL_SSEU, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; -- cgit v1.2.3 From 4ef10fe05ba0b08ce7029c07878afe3c8d5754d8 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 24 Mar 2020 11:54:57 -0700 Subject: drm/i915/perf: add new open param to configure polling of OA buffer This new parameter let's the application choose how often the OA buffer should be checked on the CPU side for data availability. Longer polling period tend to reduce CPU overhead if the application does not care about somewhat real time data collection. v2: Allow disabling polling completely with 0 value (Lionel) v3: Version the new parameter (Joonas) v4: Rebase (Umesh) v5: Make poll delay value of 0 invalid (Umesh) v6: - Describe poll_oa_period (Ashutosh) - Fix comment for new poll parameter (Lionel) - Drop open_flags in read_properties_unlocked (Lionel) - Rename uapi parameter (Ashutosh) v7: Reword the comment in uapi (Ashutosh) Signed-off-by: Lionel Landwerlin Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Signed-off-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20200324185457.14635-4-umesh.nerlige.ramappa@intel.com --- include/uapi/drm/i915_drm.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index db649d03ab52..14b67cd6b54b 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1980,6 +1980,19 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_GLOBAL_SSEU, + /** + * This optional parameter specifies the timer interval in nanoseconds + * at which the i915 driver will check the OA buffer for available data. + * Minimum allowed value is 100 microseconds. A default value is used by + * the driver if this parameter is not specified. Note that larger timer + * values will reduce cpu consumption during OA perf captures. However, + * excessively large values would potentially result in OA buffer + * overwrites as captures reach end of the OA buffer. + * + * This property is available in perf revision 5. + */ + DRM_I915_PERF_PROP_POLL_OA_PERIOD, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; -- cgit v1.2.3 From c57053725d9bd7ab3e14fd0f8001714041df1280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 17 Apr 2020 20:50:30 -0400 Subject: drm/amdgpu: add tiling flags from Mesa MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DCC_INDEPENDENT_128B is needed for displayble DCC on gfx10. SCANOUT is not needed by the kernel, but Mesa uses it. Signed-off-by: Marek Olšák Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 65f69723cbdc..d28b4ce744d5 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -346,6 +346,10 @@ struct drm_amdgpu_gem_userptr { #define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF #define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 43 #define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1 +#define AMDGPU_TILING_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_SCANOUT_MASK 0x1 /* Set/Get helpers for tiling flags. */ #define AMDGPU_TILING_SET(field, value) \ -- cgit v1.2.3 From 35ce006004821c5e9ae8fe03d048567cec99c41e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 7 Aug 2019 21:43:24 -0500 Subject: drm/amdgpu: add UAPI for creating encrypted buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a flag to the GEM_CREATE ioctl to create encrypted buffers. Buffers with this flag set will be created with the TMZ bit set in the PTEs or engines accessing them. This is required in order to properly access the data from the engines. Signed-off-by: Alex Deucher Reviewed-by: Huang Rui Reviewed-by: Christian König --- include/uapi/drm/amdgpu_drm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index d28b4ce744d5..4a0829d35e72 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -133,6 +133,11 @@ extern "C" { * releasing the memory */ #define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE (1 << 9) +/* Flag that BO will be encrypted and that the TMZ bit should be + * set in the PTEs when mapping this buffer via GPUVM or + * accessing it with various hw blocks + */ +#define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v1.2.3 From e90c2b210bad457aab73d3357465afe4d4475f7e Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Mon, 23 Sep 2019 19:02:41 -0400 Subject: drm/amdgpu: add UAPI to create secure commands (v3) Add a flag to the command submission IOCTL structure which when present indicates that this command submission should be treated as secure. The kernel driver uses this flag to determine whether the engine should be transitioned to secure or unsecure, or the work can be submitted to a secure queue depending on the IP. v3: the flag is now at command submission IOCTL Signed-off-by: Luben Tuikov Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4a0829d35e72..34cd0bae06bc 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -205,6 +205,9 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_QUERY_STATE 3 #define AMDGPU_CTX_OP_QUERY_STATE2 4 +/* Flag the command submission as secure */ +#define AMDGPU_CS_FLAGS_SECURE (1 << 0) + /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 /* this the context caused it */ @@ -564,7 +567,7 @@ struct drm_amdgpu_cs_in { /** Handle of resource list associated with CS */ __u32 bo_list_handle; __u32 num_chunks; - __u32 _pad; + __u32 flags; /** this points to __u64 * which point to cs chunks */ __u64 chunks; }; -- cgit v1.2.3 From 4baa8ff0690e464443594353d63c989f0ed9f831 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 27 Nov 2019 15:55:35 -0500 Subject: drm/amdgpu: move CS secure flag next the structs where it's used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So it's not mixed up with the CTX stuff. Reviewed-by: Zhan Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 34cd0bae06bc..bea72eb8c147 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -205,9 +205,6 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_QUERY_STATE 3 #define AMDGPU_CTX_OP_QUERY_STATE2 4 -/* Flag the command submission as secure */ -#define AMDGPU_CS_FLAGS_SECURE (1 << 0) - /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 /* this the context caused it */ @@ -561,6 +558,9 @@ struct drm_amdgpu_cs_chunk { __u64 chunk_data; }; +/* Flag the command submission as secure */ +#define AMDGPU_CS_FLAGS_SECURE (1 << 0) + struct drm_amdgpu_cs_in { /** Rendering context id */ __u32 ctx_id; -- cgit v1.2.3 From 0bb5d5b03f78aeb5f87d47877eb15532875c64da Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Wed, 22 Apr 2020 17:56:56 -0400 Subject: drm/amdgpu: Move to a per-IB secure flag (TMZ) Move from a per-CS secure flag (TMZ) to a per-IB secure flag. Signed-off-by: Luben Tuikov Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index bea72eb8c147..e01b673f0449 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -558,9 +558,6 @@ struct drm_amdgpu_cs_chunk { __u64 chunk_data; }; -/* Flag the command submission as secure */ -#define AMDGPU_CS_FLAGS_SECURE (1 << 0) - struct drm_amdgpu_cs_in { /** Rendering context id */ __u32 ctx_id; @@ -601,6 +598,10 @@ union drm_amdgpu_cs { */ #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) +/* Flag the IB as secure (TMZ) + */ +#define AMDGPU_IB_FLAGS_SECURE (1 << 5) + struct drm_amdgpu_cs_chunk_ib { __u32 _pad; /** AMDGPU_IB_FLAG_* */ -- cgit v1.2.3 From 5bb4b78be9c67b02a7f138850e9e89825181f555 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 6 May 2019 22:11:14 -0500 Subject: drm/amdkfd: New IOCTL to allocate queue GWS (v2) Add a new kfd ioctl to allocate queue GWS. Queue GWS is released on queue destroy. v2: re-introduce this API with the following fixes squashed in: - drm/amdkfd: fix null pointer dereference on dev - drm/amdkfd: Return proper error code for gws alloc API - drm/amdkfd: Remove GPU ID in GWS queue creation Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 20917c59f39c..4f6676428c5c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -410,6 +410,20 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { __u32 n_success; /* to/from KFD */ }; +/* Allocate GWS for specific queue + * + * @queue_id: queue's id that GWS is allocated for + * @num_gws: how many GWS to allocate + * @first_gws: index of the first GWS allocated. + * only support contiguous GWS allocation + */ +struct kfd_ioctl_alloc_queue_gws_args { + __u32 queue_id; /* to KFD */ + __u32 num_gws; /* to KFD */ + __u32 first_gws; /* from KFD */ + __u32 pad; +}; + struct kfd_ioctl_get_dmabuf_info_args { __u64 size; /* from KFD */ __u64 metadata_ptr; /* to KFD */ @@ -529,7 +543,10 @@ enum kfd_mmio_remap { #define AMDKFD_IOC_IMPORT_DMABUF \ AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) +#define AMDKFD_IOC_ALLOC_QUEUE_GWS \ + AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1E +#define AMDKFD_COMMAND_END 0x1F #endif -- cgit v1.2.3 From 0aeaaf64e6d06e353de15dcf9973312ae0672ca1 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 29 Apr 2020 19:36:06 -0400 Subject: drm/amdkfd: Fix comment formatting Corrected two function names. Added a missing space. Signed-off-by: Felix Kuehling Reviewed-by: Kent Russell Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 4f6676428c5c..b6be62356d34 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -251,7 +251,7 @@ struct kfd_memory_exception_failure { __u32 imprecise; /* Can't determine the exact fault address */ }; -/* memory exception data*/ +/* memory exception data */ struct kfd_hsa_memory_exception_data { struct kfd_memory_exception_failure failure; __u64 va; -- cgit v1.2.3 From 50b6f619a099af6dfe06e958bfa08d46440ea788 Mon Sep 17 00:00:00 2001 From: Mika Kahola Date: Wed, 6 May 2020 15:08:27 +0300 Subject: uapi/drm/drm_fourcc.h: Note on platform specificity for format modifiers Make an additional note on DRM format modifiers for x and y tiling. These format modifiers are defined for BDW+ platforms and therefore definition is not valid for older gens. This is due to address swizzling for tiled surfaces is no longer used. For newer platforms main memory controller has a more effective address swizzling algorithm. v2: Rephrase comment (Daniel) Signed-off-by: Mika Kahola Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200506120827.12250-1-mika.kahola@intel.com --- include/uapi/drm/drm_fourcc.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 8bc0b31597d8..9e488d10f8b4 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -354,9 +354,12 @@ extern "C" { * a platform-dependent stride. On top of that the memory can apply * platform-depending swizzling of some higher address bits into bit6. * - * This format is highly platforms specific and not useful for cross-driver - * sharing. It exists since on a given platform it does uniquely identify the - * layout in a simple way for i915-specific userspace. + * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets. + * On earlier platforms the is highly platforms specific and not useful for + * cross-driver sharing. It exists since on a given platform it does uniquely + * identify the layout in a simple way for i915-specific userspace, which + * facilitated conversion of userspace to modifiers. Additionally the exact + * format on some really old platforms is not known. */ #define I915_FORMAT_MOD_X_TILED fourcc_mod_code(INTEL, 1) @@ -369,9 +372,12 @@ extern "C" { * memory can apply platform-depending swizzling of some higher address bits * into bit6. * - * This format is highly platforms specific and not useful for cross-driver - * sharing. It exists since on a given platform it does uniquely identify the - * layout in a simple way for i915-specific userspace. + * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets. + * On earlier platforms the is highly platforms specific and not useful for + * cross-driver sharing. It exists since on a given platform it does uniquely + * identify the layout in a simple way for i915-specific userspace, which + * facilitated conversion of userspace to modifiers. Additionally the exact + * format on some really old platforms is not known. */ #define I915_FORMAT_MOD_Y_TILED fourcc_mod_code(INTEL, 2) -- cgit v1.2.3 From 43c8546bcd854806736d8a635a0d696504dd4c21 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 28 Apr 2020 01:28:43 -0400 Subject: drm/amdgpu: Add a UAPI flag for user to call mem_sync When this flag is set in the CS IB flags, it causes a memory cache flush of the GFX. v2: Move new flag to drm_amdgpu_cs_chunk_ib.flags Bump up UAPI version Remove condition on job != null to emit mem_sync Signed-off-by: Andrey Grodzovsky Reviewed-by: Luben Tuikov Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index e01b673f0449..4e873dcbe68f 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -602,6 +602,10 @@ union drm_amdgpu_cs { */ #define AMDGPU_IB_FLAGS_SECURE (1 << 5) +/* Tell KMD to flush and invalidate caches + */ +#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC (1 << 6) + struct drm_amdgpu_cs_chunk_ib { __u32 _pad; /** AMDGPU_IB_FLAG_* */ -- cgit v1.2.3 From 82c8c4ddcae74460f4ea484ab9ad97ddb466e469 Mon Sep 17 00:00:00 2001 From: James Jones Date: Wed, 11 Dec 2019 12:55:47 -0800 Subject: drm: Generalized NV Block Linear DRM format mod Builds upon the existing NVIDIA 16Bx2 block linear format modifiers by adding more "fields" to the existing parameterized DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK format modifier macro that allow fully defining a unique-across- all-NVIDIA-hardware bit layout using a minimal set of fields and values. The new modifier macro DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D is effectively backwards compatible with the existing macro, introducing a superset of the previously definable format modifiers. Backwards compatibility has two quirks. First, the zero value for the "kind" field, which is implied by the DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK macro, must be special cased in drivers and assumed to map to the pre-Turing generic kind of 0xfe, since a kind of "zero" is reserved for linear buffer layouts on all GPUs. Second, it is assumed backwards compatibility is only needed when running on Tegra GPUs, and specifically Tegra GPUs prior to Xavier. This is based on two assertions: -Tegra GPUs prior to Xavier used a slightly different raw bit layout than desktop GPUs, making it impossible to directly share block linear buffers between the two. -Support for the existing block linear modifiers was incomplete, making them useful only for exporting buffers created by nouveau and importing them to Tegra DRM as framebuffers for scan out. There was no support for adding framebuffers using format modifiers in nouveau, nor importing dma-buf/PRIME GEM objects into nouveau userspace drivers with modifiers in Mesa. Hence it is assumed the prior modifiers were not intended for use on desktop GPUs, and as a corollary, were not intended to support sharing block linear buffers across two different NVIDIA GPUs. v2: - Added canonicalize helper function v3: - Added additional bit to compression field to support Tesla (NV5x,G8x,G9x,GT1xx,GT2xx) class chips. Signed-off-by: James Jones Signed-off-by: Ben Skeggs --- include/uapi/drm/drm_fourcc.h | 122 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 114 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 9e488d10f8b4..490143500a50 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -527,7 +527,113 @@ extern "C" { #define DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED fourcc_mod_code(NVIDIA, 1) /* - * 16Bx2 Block Linear layout, used by desktop GPUs, and Tegra K1 and later + * Generalized Block Linear layout, used by desktop GPUs starting with NV50/G80, + * and Tegra GPUs starting with Tegra K1. + * + * Pixels are arranged in Groups of Bytes (GOBs). GOB size and layout varies + * based on the architecture generation. GOBs themselves are then arranged in + * 3D blocks, with the block dimensions (in terms of GOBs) always being a power + * of two, and hence expressible as their log2 equivalent (E.g., "2" represents + * a block depth or height of "4"). + * + * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format + * in full detail. + * + * Macro + * Bits Param Description + * ---- ----- ----------------------------------------------------------------- + * + * 3:0 h log2(height) of each block, in GOBs. Placed here for + * compatibility with the existing + * DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers. + * + * 4:4 - Must be 1, to indicate block-linear layout. Necessary for + * compatibility with the existing + * DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers. + * + * 8:5 - Reserved (To support 3D-surfaces with variable log2(depth) block + * size). Must be zero. + * + * Note there is no log2(width) parameter. Some portions of the + * hardware support a block width of two gobs, but it is impractical + * to use due to lack of support elsewhere, and has no known + * benefits. + * + * 11:9 - Reserved (To support 2D-array textures with variable array stride + * in blocks, specified via log2(tile width in blocks)). Must be + * zero. + * + * 19:12 k Page Kind. This value directly maps to a field in the page + * tables of all GPUs >= NV50. It affects the exact layout of bits + * in memory and can be derived from the tuple + * + * (format, GPU model, compression type, samples per pixel) + * + * Where compression type is defined below. If GPU model were + * implied by the format modifier, format, or memory buffer, page + * kind would not need to be included in the modifier itself, but + * since the modifier should define the layout of the associated + * memory buffer independent from any device or other context, it + * must be included here. + * + * 21:20 g GOB Height and Page Kind Generation. The height of a GOB changed + * starting with Fermi GPUs. Additionally, the mapping between page + * kind and bit layout has changed at various points. + * + * 0 = Gob Height 8, Fermi - Volta, Tegra K1+ Page Kind mapping + * 1 = Gob Height 4, G80 - GT2XX Page Kind mapping + * 2 = Gob Height 8, Turing+ Page Kind mapping + * 3 = Reserved for future use. + * + * 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a further + * bit remapping step that occurs at an even lower level than the + * page kind and block linear swizzles. This causes the layout of + * surfaces mapped in those SOC's GPUs to be incompatible with the + * equivalent mapping on other GPUs in the same system. + * + * 0 = Tegra K1 - Tegra Parker/TX2 Layout. + * 1 = Desktop GPU and Tegra Xavier+ Layout + * + * 25:23 c Lossless Framebuffer Compression type. + * + * 0 = none + * 1 = ROP/3D, layout 1, exact compression format implied by Page + * Kind field + * 2 = ROP/3D, layout 2, exact compression format implied by Page + * Kind field + * 3 = CDE horizontal + * 4 = CDE vertical + * 5 = Reserved for future use + * 6 = Reserved for future use + * 7 = Reserved for future use + * + * 55:25 - Reserved for future use. Must be zero. + */ +#define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \ + fourcc_mod_code(NVIDIA, (0x10 | \ + ((h) & 0xf) | \ + (((k) & 0xff) << 12) | \ + (((g) & 0x3) << 20) | \ + (((s) & 0x1) << 22) | \ + (((c) & 0x7) << 23))) + +/* To grandfather in prior block linear format modifiers to the above layout, + * the page kind "0", which corresponds to "pitch/linear" and hence is unusable + * with block-linear layouts, is remapped within drivers to the value 0xfe, + * which corresponds to the "generic" kind used for simple single-sample + * uncompressed color formats on Fermi - Volta GPUs. + */ +static inline __u64 +drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) +{ + if (!(modifier & 0x10) || (modifier & (0xff << 12))) + return modifier; + else + return modifier | (0xfe << 12); +} + +/* + * 16Bx2 Block Linear layout, used by Tegra K1 and later * * Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked * vertically by a power of 2 (1 to 32 GOBs) to form a block. @@ -548,20 +654,20 @@ extern "C" { * in full detail. */ #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(v) \ - fourcc_mod_code(NVIDIA, 0x10 | ((v) & 0xf)) + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 0, 0, 0, (v)) #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB \ - fourcc_mod_code(NVIDIA, 0x10) + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0) #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB \ - fourcc_mod_code(NVIDIA, 0x11) + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1) #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB \ - fourcc_mod_code(NVIDIA, 0x12) + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2) #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB \ - fourcc_mod_code(NVIDIA, 0x13) + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3) #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB \ - fourcc_mod_code(NVIDIA, 0x14) + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4) #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \ - fourcc_mod_code(NVIDIA, 0x15) + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5) /* * Some Broadcom modifiers take parameters, for example the number of -- cgit v1.2.3