From 5cfd978490d82486fc3286a72009fb3229760555 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 5 Dec 2022 14:43:00 +0800 Subject: drm/amdgpu: expose peak profiling mode shader/memory clocks Expose those informations to UMD who need them as for standard profiling mode. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4038abe8505a..8c5d05384767 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -832,6 +832,10 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK 0x8 /* Subquery id: Query GPU stable pstate memory clock */ #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK 0x9 + /* Subquery id: Query GPU peak pstate shader clock */ + #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK 0xa + /* Subquery id: Query GPU peak pstate memory clock */ + #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK 0xb /* Number of VRAM page faults on CPU access. */ #define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E #define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F -- cgit v1.2.3 From 88347fa18bead86949c45229faaa2c66177c62fb Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 5 Dec 2022 10:09:38 +0800 Subject: drm/amdgpu: expose the minimum shader/memory clock frequency Otherwise, some UMD tools will treate them as 0 at default while actually they are not. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 8c5d05384767..fe7f871e3080 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1111,6 +1111,8 @@ struct drm_amdgpu_info_device { __u32 pa_sc_tile_steering_override; /* disabled TCCs */ __u64 tcc_disabled_mask; + __u64 min_engine_clock; + __u64 min_memory_clock; }; struct drm_amdgpu_info_hw_ip { -- cgit v1.2.3 From b357e7ac1b7349befaeded273b775c7af23a538b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 23 Nov 2022 20:24:37 +0100 Subject: drm/fourcc: Document open source user waiver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a bit a FAQ, and we really can't claim to be the authoritative source for allocating these numbers used in many standard extensions if we tell closed source or vendor stacks in general to go away. Iirc this was already clarified in some vulkan discussions, but I can't find that anywhere anymore. At least not in a public link. Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: Alex Deucher Cc: Daniel Stone Cc: Bas Nieuwenhuizen Cc: Jason Ekstrand Cc: Neil Trevett Acked-by: Daniel Stone Acked-by: Maxime Ripard Acked-by: David Airlie Acked-by: Marek Olšák Acked-by: Bas Nieuwenhuizen Acked-by: Jason Ekstrand Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20221123192437.1065826-1-daniel.vetter@ffwll.ch --- include/uapi/drm/drm_fourcc.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index bc056f2d537d..de703c6be969 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -88,6 +88,18 @@ extern "C" { * * The authoritative list of format modifier codes is found in * `include/uapi/drm/drm_fourcc.h` + * + * Open Source User Waiver + * ----------------------- + * + * Because this is the authoritative source for pixel formats and modifiers + * referenced by GL, Vulkan extensions and other standards and hence used both + * by open source and closed source driver stacks, the usual requirement for an + * upstream in-kernel or open source userspace user does not apply. + * + * To ensure, as much as feasible, compatibility across stacks and avoid + * confusion with incompatible enumerations stakeholders for all relevant driver + * stacks should approve additions. */ #define fourcc_code(a, b, c, d) ((__u32)(a) | ((__u32)(b) << 8) | \ -- cgit v1.2.3 From cab18866feade5ffa0cadc5e632528b2050e8e28 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Sat, 3 Dec 2022 18:22:52 +0800 Subject: drm: Remove the obsolete driver-i810 Commit 399516ab0fee ("MAINTAINERS: Add a bunch of legacy (UMS) DRM drivers") marked i810 driver obsolete 7 years ago. And the mesa UMD of this drm driver already in deprecated list in the link: https://docs.mesa3d.org/systems.html Intel i810-->driver/gpu/drm/i810 It's time to remove this driver. Signed-off-by: Cai Huoqing Acked-by: Daniel Vetter Acked-by: Dave Airlie Acked-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20221203102502.3185-2-cai.huoqing@linux.dev --- include/uapi/drm/i810_drm.h | 292 -------------------------------------------- 1 file changed, 292 deletions(-) delete mode 100644 include/uapi/drm/i810_drm.h (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i810_drm.h b/include/uapi/drm/i810_drm.h deleted file mode 100644 index d285d5e72e6a..000000000000 --- a/include/uapi/drm/i810_drm.h +++ /dev/null @@ -1,292 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _I810_DRM_H_ -#define _I810_DRM_H_ - -#include "drm.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -/* WARNING: These defines must be the same as what the Xserver uses. - * if you change them, you must change the defines in the Xserver. - */ - -#ifndef _I810_DEFINES_ -#define _I810_DEFINES_ - -#define I810_DMA_BUF_ORDER 12 -#define I810_DMA_BUF_SZ (1< Date: Sat, 3 Dec 2022 18:22:53 +0800 Subject: drm: Remove the obsolete driver-mga Commit 399516ab0fee ("MAINTAINERS: Add a bunch of legacy (UMS) DRM drivers") marked mga driver obsolete 7 years ago. And the mesa UMD of this drm driver already in deprecated list in the link: https://docs.mesa3d.org/systems.html Matrox-->driver/gpu/drm/mga It's time to remove this driver. Signed-off-by: Cai Huoqing Acked-by: Daniel Vetter Acked-by: Dave Airlie Acked-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20221203102502.3185-3-cai.huoqing@linux.dev --- include/uapi/drm/mga_drm.h | 429 --------------------------------------------- 1 file changed, 429 deletions(-) delete mode 100644 include/uapi/drm/mga_drm.h (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/mga_drm.h b/include/uapi/drm/mga_drm.h deleted file mode 100644 index bb31567e66c0..000000000000 --- a/include/uapi/drm/mga_drm.h +++ /dev/null @@ -1,429 +0,0 @@ -/* mga_drm.h -- Public header for the Matrox g200/g400 driver -*- linux-c -*- - * Created: Tue Jan 25 01:50:01 1999 by jhartmann@precisioninsight.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jeff Hartmann - * Keith Whitwell - * - * Rewritten by: - * Gareth Hughes - */ - -#ifndef __MGA_DRM_H__ -#define __MGA_DRM_H__ - -#include "drm.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -/* WARNING: If you change any of these defines, make sure to change the - * defines in the Xserver file (mga_sarea.h) - */ - -#ifndef __MGA_SAREA_DEFINES__ -#define __MGA_SAREA_DEFINES__ - -/* WARP pipe flags - */ -#define MGA_F 0x1 /* fog */ -#define MGA_A 0x2 /* alpha */ -#define MGA_S 0x4 /* specular */ -#define MGA_T2 0x8 /* multitexture */ - -#define MGA_WARP_TGZ 0 -#define MGA_WARP_TGZF (MGA_F) -#define MGA_WARP_TGZA (MGA_A) -#define MGA_WARP_TGZAF (MGA_F|MGA_A) -#define MGA_WARP_TGZS (MGA_S) -#define MGA_WARP_TGZSF (MGA_S|MGA_F) -#define MGA_WARP_TGZSA (MGA_S|MGA_A) -#define MGA_WARP_TGZSAF (MGA_S|MGA_F|MGA_A) -#define MGA_WARP_T2GZ (MGA_T2) -#define MGA_WARP_T2GZF (MGA_T2|MGA_F) -#define MGA_WARP_T2GZA (MGA_T2|MGA_A) -#define MGA_WARP_T2GZAF (MGA_T2|MGA_A|MGA_F) -#define MGA_WARP_T2GZS (MGA_T2|MGA_S) -#define MGA_WARP_T2GZSF (MGA_T2|MGA_S|MGA_F) -#define MGA_WARP_T2GZSA (MGA_T2|MGA_S|MGA_A) -#define MGA_WARP_T2GZSAF (MGA_T2|MGA_S|MGA_F|MGA_A) - -#define MGA_MAX_G200_PIPES 8 /* no multitex */ -#define MGA_MAX_G400_PIPES 16 -#define MGA_MAX_WARP_PIPES MGA_MAX_G400_PIPES -#define MGA_WARP_UCODE_SIZE 32768 /* in bytes */ - -#define MGA_CARD_TYPE_G200 1 -#define MGA_CARD_TYPE_G400 2 -#define MGA_CARD_TYPE_G450 3 /* not currently used */ -#define MGA_CARD_TYPE_G550 4 - -#define MGA_FRONT 0x1 -#define MGA_BACK 0x2 -#define MGA_DEPTH 0x4 - -/* What needs to be changed for the current vertex dma buffer? - */ -#define MGA_UPLOAD_CONTEXT 0x1 -#define MGA_UPLOAD_TEX0 0x2 -#define MGA_UPLOAD_TEX1 0x4 -#define MGA_UPLOAD_PIPE 0x8 -#define MGA_UPLOAD_TEX0IMAGE 0x10 /* handled client-side */ -#define MGA_UPLOAD_TEX1IMAGE 0x20 /* handled client-side */ -#define MGA_UPLOAD_2D 0x40 -#define MGA_WAIT_AGE 0x80 /* handled client-side */ -#define MGA_UPLOAD_CLIPRECTS 0x100 /* handled client-side */ -#if 0 -#define MGA_DMA_FLUSH 0x200 /* set when someone gets the lock - quiescent */ -#endif - -/* 32 buffers of 64k each, total 2 meg. - */ -#define MGA_BUFFER_SIZE (1 << 16) -#define MGA_NUM_BUFFERS 128 - -/* Keep these small for testing. - */ -#define MGA_NR_SAREA_CLIPRECTS 8 - -/* 2 heaps (1 for card, 1 for agp), each divided into up to 128 - * regions, subject to a minimum region size of (1<<16) == 64k. - * - * Clients may subdivide regions internally, but when sharing between - * clients, the region size is the minimum granularity. - */ - -#define MGA_CARD_HEAP 0 -#define MGA_AGP_HEAP 1 -#define MGA_NR_TEX_HEAPS 2 -#define MGA_NR_TEX_REGIONS 16 -#define MGA_LOG_MIN_TEX_REGION_SIZE 16 - -#define DRM_MGA_IDLE_RETRY 2048 - -#endif /* __MGA_SAREA_DEFINES__ */ - -/* Setup registers for 3D context - */ -typedef struct { - unsigned int dstorg; - unsigned int maccess; - unsigned int plnwt; - unsigned int dwgctl; - unsigned int alphactrl; - unsigned int fogcolor; - unsigned int wflag; - unsigned int tdualstage0; - unsigned int tdualstage1; - unsigned int fcol; - unsigned int stencil; - unsigned int stencilctl; -} drm_mga_context_regs_t; - -/* Setup registers for 2D, X server - */ -typedef struct { - unsigned int pitch; -} drm_mga_server_regs_t; - -/* Setup registers for each texture unit - */ -typedef struct { - unsigned int texctl; - unsigned int texctl2; - unsigned int texfilter; - unsigned int texbordercol; - unsigned int texorg; - unsigned int texwidth; - unsigned int texheight; - unsigned int texorg1; - unsigned int texorg2; - unsigned int texorg3; - unsigned int texorg4; -} drm_mga_texture_regs_t; - -/* General aging mechanism - */ -typedef struct { - unsigned int head; /* Position of head pointer */ - unsigned int wrap; /* Primary DMA wrap count */ -} drm_mga_age_t; - -typedef struct _drm_mga_sarea { - /* The channel for communication of state information to the kernel - * on firing a vertex dma buffer. - */ - drm_mga_context_regs_t context_state; - drm_mga_server_regs_t server_state; - drm_mga_texture_regs_t tex_state[2]; - unsigned int warp_pipe; - unsigned int dirty; - unsigned int vertsize; - - /* The current cliprects, or a subset thereof. - */ - struct drm_clip_rect boxes[MGA_NR_SAREA_CLIPRECTS]; - unsigned int nbox; - - /* Information about the most recently used 3d drawable. The - * client fills in the req_* fields, the server fills in the - * exported_ fields and puts the cliprects into boxes, above. - * - * The client clears the exported_drawable field before - * clobbering the boxes data. - */ - unsigned int req_drawable; /* the X drawable id */ - unsigned int req_draw_buffer; /* MGA_FRONT or MGA_BACK */ - - unsigned int exported_drawable; - unsigned int exported_index; - unsigned int exported_stamp; - unsigned int exported_buffers; - unsigned int exported_nfront; - unsigned int exported_nback; - int exported_back_x, exported_front_x, exported_w; - int exported_back_y, exported_front_y, exported_h; - struct drm_clip_rect exported_boxes[MGA_NR_SAREA_CLIPRECTS]; - - /* Counters for aging textures and for client-side throttling. - */ - unsigned int status[4]; - unsigned int last_wrap; - - drm_mga_age_t last_frame; - unsigned int last_enqueue; /* last time a buffer was enqueued */ - unsigned int last_dispatch; /* age of the most recently dispatched buffer */ - unsigned int last_quiescent; /* */ - - /* LRU lists for texture memory in agp space and on the card. - */ - struct drm_tex_region texList[MGA_NR_TEX_HEAPS][MGA_NR_TEX_REGIONS + 1]; - unsigned int texAge[MGA_NR_TEX_HEAPS]; - - /* Mechanism to validate card state. - */ - int ctxOwner; -} drm_mga_sarea_t; - -/* MGA specific ioctls - * The device specific ioctl range is 0x40 to 0x79. - */ -#define DRM_MGA_INIT 0x00 -#define DRM_MGA_FLUSH 0x01 -#define DRM_MGA_RESET 0x02 -#define DRM_MGA_SWAP 0x03 -#define DRM_MGA_CLEAR 0x04 -#define DRM_MGA_VERTEX 0x05 -#define DRM_MGA_INDICES 0x06 -#define DRM_MGA_ILOAD 0x07 -#define DRM_MGA_BLIT 0x08 -#define DRM_MGA_GETPARAM 0x09 - -/* 3.2: - * ioctls for operating on fences. - */ -#define DRM_MGA_SET_FENCE 0x0a -#define DRM_MGA_WAIT_FENCE 0x0b -#define DRM_MGA_DMA_BOOTSTRAP 0x0c - -#define DRM_IOCTL_MGA_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_INIT, drm_mga_init_t) -#define DRM_IOCTL_MGA_FLUSH DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_FLUSH, struct drm_lock) -#define DRM_IOCTL_MGA_RESET DRM_IO( DRM_COMMAND_BASE + DRM_MGA_RESET) -#define DRM_IOCTL_MGA_SWAP DRM_IO( DRM_COMMAND_BASE + DRM_MGA_SWAP) -#define DRM_IOCTL_MGA_CLEAR DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_CLEAR, drm_mga_clear_t) -#define DRM_IOCTL_MGA_VERTEX DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_VERTEX, drm_mga_vertex_t) -#define DRM_IOCTL_MGA_INDICES DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_INDICES, drm_mga_indices_t) -#define DRM_IOCTL_MGA_ILOAD DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_ILOAD, drm_mga_iload_t) -#define DRM_IOCTL_MGA_BLIT DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_BLIT, drm_mga_blit_t) -#define DRM_IOCTL_MGA_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA_GETPARAM, drm_mga_getparam_t) -#define DRM_IOCTL_MGA_SET_FENCE DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_SET_FENCE, __u32) -#define DRM_IOCTL_MGA_WAIT_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA_WAIT_FENCE, __u32) -#define DRM_IOCTL_MGA_DMA_BOOTSTRAP DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA_DMA_BOOTSTRAP, drm_mga_dma_bootstrap_t) - -typedef struct _drm_mga_warp_index { - int installed; - unsigned long phys_addr; - int size; -} drm_mga_warp_index_t; - -typedef struct drm_mga_init { - enum { - MGA_INIT_DMA = 0x01, - MGA_CLEANUP_DMA = 0x02 - } func; - - unsigned long sarea_priv_offset; - - __struct_group(/* no tag */, always32bit, /* no attrs */, - int chipset; - int sgram; - - unsigned int maccess; - - unsigned int fb_cpp; - unsigned int front_offset, front_pitch; - unsigned int back_offset, back_pitch; - - unsigned int depth_cpp; - unsigned int depth_offset, depth_pitch; - - unsigned int texture_offset[MGA_NR_TEX_HEAPS]; - unsigned int texture_size[MGA_NR_TEX_HEAPS]; - ); - - unsigned long fb_offset; - unsigned long mmio_offset; - unsigned long status_offset; - unsigned long warp_offset; - unsigned long primary_offset; - unsigned long buffers_offset; -} drm_mga_init_t; - -typedef struct drm_mga_dma_bootstrap { - /** - * \name AGP texture region - * - * On return from the DRM_MGA_DMA_BOOTSTRAP ioctl, these fields will - * be filled in with the actual AGP texture settings. - * - * \warning - * If these fields are non-zero, but dma_mga_dma_bootstrap::agp_mode - * is zero, it means that PCI memory (most likely through the use of - * an IOMMU) is being used for "AGP" textures. - */ - /*@{ */ - unsigned long texture_handle; /**< Handle used to map AGP textures. */ - __u32 texture_size; /**< Size of the AGP texture region. */ - /*@} */ - - /** - * Requested size of the primary DMA region. - * - * On return from the DRM_MGA_DMA_BOOTSTRAP ioctl, this field will be - * filled in with the actual AGP mode. If AGP was not available - */ - __u32 primary_size; - - /** - * Requested number of secondary DMA buffers. - * - * On return from the DRM_MGA_DMA_BOOTSTRAP ioctl, this field will be - * filled in with the actual number of secondary DMA buffers - * allocated. Particularly when PCI DMA is used, this may be - * (subtantially) less than the number requested. - */ - __u32 secondary_bin_count; - - /** - * Requested size of each secondary DMA buffer. - * - * While the kernel \b is free to reduce - * dma_mga_dma_bootstrap::secondary_bin_count, it is \b not allowed - * to reduce dma_mga_dma_bootstrap::secondary_bin_size. - */ - __u32 secondary_bin_size; - - /** - * Bit-wise mask of AGPSTAT2_* values. Currently only \c AGPSTAT2_1X, - * \c AGPSTAT2_2X, and \c AGPSTAT2_4X are supported. If this value is - * zero, it means that PCI DMA should be used, even if AGP is - * possible. - * - * On return from the DRM_MGA_DMA_BOOTSTRAP ioctl, this field will be - * filled in with the actual AGP mode. If AGP was not available - * (i.e., PCI DMA was used), this value will be zero. - */ - __u32 agp_mode; - - /** - * Desired AGP GART size, measured in megabytes. - */ - __u8 agp_size; -} drm_mga_dma_bootstrap_t; - -typedef struct drm_mga_clear { - unsigned int flags; - unsigned int clear_color; - unsigned int clear_depth; - unsigned int color_mask; - unsigned int depth_mask; -} drm_mga_clear_t; - -typedef struct drm_mga_vertex { - int idx; /* buffer to queue */ - int used; /* bytes in use */ - int discard; /* client finished with buffer? */ -} drm_mga_vertex_t; - -typedef struct drm_mga_indices { - int idx; /* buffer to queue */ - unsigned int start; - unsigned int end; - int discard; /* client finished with buffer? */ -} drm_mga_indices_t; - -typedef struct drm_mga_iload { - int idx; - unsigned int dstorg; - unsigned int length; -} drm_mga_iload_t; - -typedef struct _drm_mga_blit { - unsigned int planemask; - unsigned int srcorg; - unsigned int dstorg; - int src_pitch, dst_pitch; - int delta_sx, delta_sy; - int delta_dx, delta_dy; - int height, ydir; /* flip image vertically */ - int source_pitch, dest_pitch; -} drm_mga_blit_t; - -/* 3.1: An ioctl to get parameters that aren't available to the 3d - * client any other way. - */ -#define MGA_PARAM_IRQ_NR 1 - -/* 3.2: Query the actual card type. The DDX only distinguishes between - * G200 chips and non-G200 chips, which it calls G400. It turns out that - * there are some very sublte differences between the G4x0 chips and the G550 - * chips. Using this parameter query, a client-side driver can detect the - * difference between a G4x0 and a G550. - */ -#define MGA_PARAM_CARD_TYPE 2 - -typedef struct drm_mga_getparam { - int param; - void __user *value; -} drm_mga_getparam_t; - -#if defined(__cplusplus) -} -#endif - -#endif -- cgit v1.2.3 From 28483b8666bfe7d0ec34cfc492d77e64f97f6de1 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Sat, 3 Dec 2022 18:22:54 +0800 Subject: drm: Remove the obsolete driver-r128 Commit 399516ab0fee ("MAINTAINERS: Add a bunch of legacy (UMS) DRM drivers") marked r128 driver obsolete 7 years ago. And the mesa UMD of this drm driver already in deprecated list in the link: https://docs.mesa3d.org/systems.html ATI Rage 128->drivers/gpu/drm/r128 It's time to remove this driver. Signed-off-by: Cai Huoqing Acked-by: Daniel Vetter Acked-by: Dave Airlie Acked-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20221203102502.3185-4-cai.huoqing@linux.dev --- include/uapi/drm/r128_drm.h | 336 -------------------------------------------- 1 file changed, 336 deletions(-) delete mode 100644 include/uapi/drm/r128_drm.h (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/r128_drm.h b/include/uapi/drm/r128_drm.h deleted file mode 100644 index 690e9c62f510..000000000000 --- a/include/uapi/drm/r128_drm.h +++ /dev/null @@ -1,336 +0,0 @@ -/* r128_drm.h -- Public header for the r128 driver -*- linux-c -*- - * Created: Wed Apr 5 19:24:19 2000 by kevin@precisioninsight.com - */ -/* - * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Gareth Hughes - * Kevin E. Martin - */ - -#ifndef __R128_DRM_H__ -#define __R128_DRM_H__ - -#include "drm.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -/* WARNING: If you change any of these defines, make sure to change the - * defines in the X server file (r128_sarea.h) - */ -#ifndef __R128_SAREA_DEFINES__ -#define __R128_SAREA_DEFINES__ - -/* What needs to be changed for the current vertex buffer? - */ -#define R128_UPLOAD_CONTEXT 0x001 -#define R128_UPLOAD_SETUP 0x002 -#define R128_UPLOAD_TEX0 0x004 -#define R128_UPLOAD_TEX1 0x008 -#define R128_UPLOAD_TEX0IMAGES 0x010 -#define R128_UPLOAD_TEX1IMAGES 0x020 -#define R128_UPLOAD_CORE 0x040 -#define R128_UPLOAD_MASKS 0x080 -#define R128_UPLOAD_WINDOW 0x100 -#define R128_UPLOAD_CLIPRECTS 0x200 /* handled client-side */ -#define R128_REQUIRE_QUIESCENCE 0x400 -#define R128_UPLOAD_ALL 0x7ff - -#define R128_FRONT 0x1 -#define R128_BACK 0x2 -#define R128_DEPTH 0x4 - -/* Primitive types - */ -#define R128_POINTS 0x1 -#define R128_LINES 0x2 -#define R128_LINE_STRIP 0x3 -#define R128_TRIANGLES 0x4 -#define R128_TRIANGLE_FAN 0x5 -#define R128_TRIANGLE_STRIP 0x6 - -/* Vertex/indirect buffer size - */ -#define R128_BUFFER_SIZE 16384 - -/* Byte offsets for indirect buffer data - */ -#define R128_INDEX_PRIM_OFFSET 20 -#define R128_HOSTDATA_BLIT_OFFSET 32 - -/* Keep these small for testing. - */ -#define R128_NR_SAREA_CLIPRECTS 12 - -/* There are 2 heaps (local/AGP). Each region within a heap is a - * minimum of 64k, and there are at most 64 of them per heap. - */ -#define R128_LOCAL_TEX_HEAP 0 -#define R128_AGP_TEX_HEAP 1 -#define R128_NR_TEX_HEAPS 2 -#define R128_NR_TEX_REGIONS 64 -#define R128_LOG_TEX_GRANULARITY 16 - -#define R128_NR_CONTEXT_REGS 12 - -#define R128_MAX_TEXTURE_LEVELS 11 -#define R128_MAX_TEXTURE_UNITS 2 - -#endif /* __R128_SAREA_DEFINES__ */ - -typedef struct { - /* Context state - can be written in one large chunk */ - unsigned int dst_pitch_offset_c; - unsigned int dp_gui_master_cntl_c; - unsigned int sc_top_left_c; - unsigned int sc_bottom_right_c; - unsigned int z_offset_c; - unsigned int z_pitch_c; - unsigned int z_sten_cntl_c; - unsigned int tex_cntl_c; - unsigned int misc_3d_state_cntl_reg; - unsigned int texture_clr_cmp_clr_c; - unsigned int texture_clr_cmp_msk_c; - unsigned int fog_color_c; - - /* Texture state */ - unsigned int tex_size_pitch_c; - unsigned int constant_color_c; - - /* Setup state */ - unsigned int pm4_vc_fpu_setup; - unsigned int setup_cntl; - - /* Mask state */ - unsigned int dp_write_mask; - unsigned int sten_ref_mask_c; - unsigned int plane_3d_mask_c; - - /* Window state */ - unsigned int window_xy_offset; - - /* Core state */ - unsigned int scale_3d_cntl; -} drm_r128_context_regs_t; - -/* Setup registers for each texture unit - */ -typedef struct { - unsigned int tex_cntl; - unsigned int tex_combine_cntl; - unsigned int tex_size_pitch; - unsigned int tex_offset[R128_MAX_TEXTURE_LEVELS]; - unsigned int tex_border_color; -} drm_r128_texture_regs_t; - -typedef struct drm_r128_sarea { - /* The channel for communication of state information to the kernel - * on firing a vertex buffer. - */ - drm_r128_context_regs_t context_state; - drm_r128_texture_regs_t tex_state[R128_MAX_TEXTURE_UNITS]; - unsigned int dirty; - unsigned int vertsize; - unsigned int vc_format; - - /* The current cliprects, or a subset thereof. - */ - struct drm_clip_rect boxes[R128_NR_SAREA_CLIPRECTS]; - unsigned int nbox; - - /* Counters for client-side throttling of rendering clients. - */ - unsigned int last_frame; - unsigned int last_dispatch; - - struct drm_tex_region tex_list[R128_NR_TEX_HEAPS][R128_NR_TEX_REGIONS + 1]; - unsigned int tex_age[R128_NR_TEX_HEAPS]; - int ctx_owner; - int pfAllowPageFlip; /* number of 3d windows (0,1,2 or more) */ - int pfCurrentPage; /* which buffer is being displayed? */ -} drm_r128_sarea_t; - -/* WARNING: If you change any of these defines, make sure to change the - * defines in the Xserver file (xf86drmR128.h) - */ - -/* Rage 128 specific ioctls - * The device specific ioctl range is 0x40 to 0x79. - */ -#define DRM_R128_INIT 0x00 -#define DRM_R128_CCE_START 0x01 -#define DRM_R128_CCE_STOP 0x02 -#define DRM_R128_CCE_RESET 0x03 -#define DRM_R128_CCE_IDLE 0x04 -/* 0x05 not used */ -#define DRM_R128_RESET 0x06 -#define DRM_R128_SWAP 0x07 -#define DRM_R128_CLEAR 0x08 -#define DRM_R128_VERTEX 0x09 -#define DRM_R128_INDICES 0x0a -#define DRM_R128_BLIT 0x0b -#define DRM_R128_DEPTH 0x0c -#define DRM_R128_STIPPLE 0x0d -/* 0x0e not used */ -#define DRM_R128_INDIRECT 0x0f -#define DRM_R128_FULLSCREEN 0x10 -#define DRM_R128_CLEAR2 0x11 -#define DRM_R128_GETPARAM 0x12 -#define DRM_R128_FLIP 0x13 - -#define DRM_IOCTL_R128_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_R128_INIT, drm_r128_init_t) -#define DRM_IOCTL_R128_CCE_START DRM_IO( DRM_COMMAND_BASE + DRM_R128_CCE_START) -#define DRM_IOCTL_R128_CCE_STOP DRM_IOW( DRM_COMMAND_BASE + DRM_R128_CCE_STOP, drm_r128_cce_stop_t) -#define DRM_IOCTL_R128_CCE_RESET DRM_IO( DRM_COMMAND_BASE + DRM_R128_CCE_RESET) -#define DRM_IOCTL_R128_CCE_IDLE DRM_IO( DRM_COMMAND_BASE + DRM_R128_CCE_IDLE) -/* 0x05 not used */ -#define DRM_IOCTL_R128_RESET DRM_IO( DRM_COMMAND_BASE + DRM_R128_RESET) -#define DRM_IOCTL_R128_SWAP DRM_IO( DRM_COMMAND_BASE + DRM_R128_SWAP) -#define DRM_IOCTL_R128_CLEAR DRM_IOW( DRM_COMMAND_BASE + DRM_R128_CLEAR, drm_r128_clear_t) -#define DRM_IOCTL_R128_VERTEX DRM_IOW( DRM_COMMAND_BASE + DRM_R128_VERTEX, drm_r128_vertex_t) -#define DRM_IOCTL_R128_INDICES DRM_IOW( DRM_COMMAND_BASE + DRM_R128_INDICES, drm_r128_indices_t) -#define DRM_IOCTL_R128_BLIT DRM_IOW( DRM_COMMAND_BASE + DRM_R128_BLIT, drm_r128_blit_t) -#define DRM_IOCTL_R128_DEPTH DRM_IOW( DRM_COMMAND_BASE + DRM_R128_DEPTH, drm_r128_depth_t) -#define DRM_IOCTL_R128_STIPPLE DRM_IOW( DRM_COMMAND_BASE + DRM_R128_STIPPLE, drm_r128_stipple_t) -/* 0x0e not used */ -#define DRM_IOCTL_R128_INDIRECT DRM_IOWR(DRM_COMMAND_BASE + DRM_R128_INDIRECT, drm_r128_indirect_t) -#define DRM_IOCTL_R128_FULLSCREEN DRM_IOW( DRM_COMMAND_BASE + DRM_R128_FULLSCREEN, drm_r128_fullscreen_t) -#define DRM_IOCTL_R128_CLEAR2 DRM_IOW( DRM_COMMAND_BASE + DRM_R128_CLEAR2, drm_r128_clear2_t) -#define DRM_IOCTL_R128_GETPARAM DRM_IOWR( DRM_COMMAND_BASE + DRM_R128_GETPARAM, drm_r128_getparam_t) -#define DRM_IOCTL_R128_FLIP DRM_IO( DRM_COMMAND_BASE + DRM_R128_FLIP) - -typedef struct drm_r128_init { - enum { - R128_INIT_CCE = 0x01, - R128_CLEANUP_CCE = 0x02 - } func; - unsigned long sarea_priv_offset; - int is_pci; - int cce_mode; - int cce_secure; - int ring_size; - int usec_timeout; - - unsigned int fb_bpp; - unsigned int front_offset, front_pitch; - unsigned int back_offset, back_pitch; - unsigned int depth_bpp; - unsigned int depth_offset, depth_pitch; - unsigned int span_offset; - - unsigned long fb_offset; - unsigned long mmio_offset; - unsigned long ring_offset; - unsigned long ring_rptr_offset; - unsigned long buffers_offset; - unsigned long agp_textures_offset; -} drm_r128_init_t; - -typedef struct drm_r128_cce_stop { - int flush; - int idle; -} drm_r128_cce_stop_t; - -typedef struct drm_r128_clear { - unsigned int flags; - unsigned int clear_color; - unsigned int clear_depth; - unsigned int color_mask; - unsigned int depth_mask; -} drm_r128_clear_t; - -typedef struct drm_r128_vertex { - int prim; - int idx; /* Index of vertex buffer */ - int count; /* Number of vertices in buffer */ - int discard; /* Client finished with buffer? */ -} drm_r128_vertex_t; - -typedef struct drm_r128_indices { - int prim; - int idx; - int start; - int end; - int discard; /* Client finished with buffer? */ -} drm_r128_indices_t; - -typedef struct drm_r128_blit { - int idx; - int pitch; - int offset; - int format; - unsigned short x, y; - unsigned short width, height; -} drm_r128_blit_t; - -typedef struct drm_r128_depth { - enum { - R128_WRITE_SPAN = 0x01, - R128_WRITE_PIXELS = 0x02, - R128_READ_SPAN = 0x03, - R128_READ_PIXELS = 0x04 - } func; - int n; - int __user *x; - int __user *y; - unsigned int __user *buffer; - unsigned char __user *mask; -} drm_r128_depth_t; - -typedef struct drm_r128_stipple { - unsigned int __user *mask; -} drm_r128_stipple_t; - -typedef struct drm_r128_indirect { - int idx; - int start; - int end; - int discard; -} drm_r128_indirect_t; - -typedef struct drm_r128_fullscreen { - enum { - R128_INIT_FULLSCREEN = 0x01, - R128_CLEANUP_FULLSCREEN = 0x02 - } func; -} drm_r128_fullscreen_t; - -/* 2.3: An ioctl to get parameters that aren't available to the 3d - * client any other way. - */ -#define R128_PARAM_IRQ_NR 1 - -typedef struct drm_r128_getparam { - int param; - void __user *value; -} drm_r128_getparam_t; - -#if defined(__cplusplus) -} -#endif - -#endif -- cgit v1.2.3 From 7872bc2cb13e4dd83d193d50a835e179f449ab07 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Sat, 3 Dec 2022 18:22:55 +0800 Subject: drm: Remove the obsolete driver-savage Commit 399516ab0fee ("MAINTAINERS: Add a bunch of legacy (UMS) DRM drivers") marked savage driver obsolete 7 years ago. And the mesa UMD of this drm driver already in deprecated list in the link: https://docs.mesa3d.org/systems.html S3 Savage-->drivers/gpu/drm/savage It's time to remove this driver. Signed-off-by: Cai Huoqing Acked-by: Daniel Vetter Acked-by: Dave Airlie Acked-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20221203102502.3185-5-cai.huoqing@linux.dev --- include/uapi/drm/savage_drm.h | 220 ------------------------------------------ 1 file changed, 220 deletions(-) delete mode 100644 include/uapi/drm/savage_drm.h (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/savage_drm.h b/include/uapi/drm/savage_drm.h deleted file mode 100644 index 0f6eddef74aa..000000000000 --- a/include/uapi/drm/savage_drm.h +++ /dev/null @@ -1,220 +0,0 @@ -/* savage_drm.h -- Public header for the savage driver - * - * Copyright 2004 Felix Kuehling - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sub license, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef __SAVAGE_DRM_H__ -#define __SAVAGE_DRM_H__ - -#include "drm.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -#ifndef __SAVAGE_SAREA_DEFINES__ -#define __SAVAGE_SAREA_DEFINES__ - -/* 2 heaps (1 for card, 1 for agp), each divided into up to 128 - * regions, subject to a minimum region size of (1<<16) == 64k. - * - * Clients may subdivide regions internally, but when sharing between - * clients, the region size is the minimum granularity. - */ - -#define SAVAGE_CARD_HEAP 0 -#define SAVAGE_AGP_HEAP 1 -#define SAVAGE_NR_TEX_HEAPS 2 -#define SAVAGE_NR_TEX_REGIONS 16 -#define SAVAGE_LOG_MIN_TEX_REGION_SIZE 16 - -#endif /* __SAVAGE_SAREA_DEFINES__ */ - -typedef struct _drm_savage_sarea { - /* LRU lists for texture memory in agp space and on the card. - */ - struct drm_tex_region texList[SAVAGE_NR_TEX_HEAPS][SAVAGE_NR_TEX_REGIONS + - 1]; - unsigned int texAge[SAVAGE_NR_TEX_HEAPS]; - - /* Mechanism to validate card state. - */ - int ctxOwner; -} drm_savage_sarea_t, *drm_savage_sarea_ptr; - -/* Savage-specific ioctls - */ -#define DRM_SAVAGE_BCI_INIT 0x00 -#define DRM_SAVAGE_BCI_CMDBUF 0x01 -#define DRM_SAVAGE_BCI_EVENT_EMIT 0x02 -#define DRM_SAVAGE_BCI_EVENT_WAIT 0x03 - -#define DRM_IOCTL_SAVAGE_BCI_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t) -#define DRM_IOCTL_SAVAGE_BCI_CMDBUF DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t) -#define DRM_IOCTL_SAVAGE_BCI_EVENT_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t) -#define DRM_IOCTL_SAVAGE_BCI_EVENT_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t) - -#define SAVAGE_DMA_PCI 1 -#define SAVAGE_DMA_AGP 3 -typedef struct drm_savage_init { - enum { - SAVAGE_INIT_BCI = 1, - SAVAGE_CLEANUP_BCI = 2 - } func; - unsigned int sarea_priv_offset; - - /* some parameters */ - unsigned int cob_size; - unsigned int bci_threshold_lo, bci_threshold_hi; - unsigned int dma_type; - - /* frame buffer layout */ - unsigned int fb_bpp; - unsigned int front_offset, front_pitch; - unsigned int back_offset, back_pitch; - unsigned int depth_bpp; - unsigned int depth_offset, depth_pitch; - - /* local textures */ - unsigned int texture_offset; - unsigned int texture_size; - - /* physical locations of non-permanent maps */ - unsigned long status_offset; - unsigned long buffers_offset; - unsigned long agp_textures_offset; - unsigned long cmd_dma_offset; -} drm_savage_init_t; - -typedef union drm_savage_cmd_header drm_savage_cmd_header_t; -typedef struct drm_savage_cmdbuf { - /* command buffer in client's address space */ - drm_savage_cmd_header_t __user *cmd_addr; - unsigned int size; /* size of the command buffer in 64bit units */ - - unsigned int dma_idx; /* DMA buffer index to use */ - int discard; /* discard DMA buffer when done */ - /* vertex buffer in client's address space */ - unsigned int __user *vb_addr; - unsigned int vb_size; /* size of client vertex buffer in bytes */ - unsigned int vb_stride; /* stride of vertices in 32bit words */ - /* boxes in client's address space */ - struct drm_clip_rect __user *box_addr; - unsigned int nbox; /* number of clipping boxes */ -} drm_savage_cmdbuf_t; - -#define SAVAGE_WAIT_2D 0x1 /* wait for 2D idle before updating event tag */ -#define SAVAGE_WAIT_3D 0x2 /* wait for 3D idle before updating event tag */ -#define SAVAGE_WAIT_IRQ 0x4 /* emit or wait for IRQ, not implemented yet */ -typedef struct drm_savage_event { - unsigned int count; - unsigned int flags; -} drm_savage_event_emit_t, drm_savage_event_wait_t; - -/* Commands for the cmdbuf ioctl - */ -#define SAVAGE_CMD_STATE 0 /* a range of state registers */ -#define SAVAGE_CMD_DMA_PRIM 1 /* vertices from DMA buffer */ -#define SAVAGE_CMD_VB_PRIM 2 /* vertices from client vertex buffer */ -#define SAVAGE_CMD_DMA_IDX 3 /* indexed vertices from DMA buffer */ -#define SAVAGE_CMD_VB_IDX 4 /* indexed vertices client vertex buffer */ -#define SAVAGE_CMD_CLEAR 5 /* clear buffers */ -#define SAVAGE_CMD_SWAP 6 /* swap buffers */ - -/* Primitive types -*/ -#define SAVAGE_PRIM_TRILIST 0 /* triangle list */ -#define SAVAGE_PRIM_TRISTRIP 1 /* triangle strip */ -#define SAVAGE_PRIM_TRIFAN 2 /* triangle fan */ -#define SAVAGE_PRIM_TRILIST_201 3 /* reorder verts for correct flat - * shading on s3d */ - -/* Skip flags (vertex format) - */ -#define SAVAGE_SKIP_Z 0x01 -#define SAVAGE_SKIP_W 0x02 -#define SAVAGE_SKIP_C0 0x04 -#define SAVAGE_SKIP_C1 0x08 -#define SAVAGE_SKIP_S0 0x10 -#define SAVAGE_SKIP_T0 0x20 -#define SAVAGE_SKIP_ST0 0x30 -#define SAVAGE_SKIP_S1 0x40 -#define SAVAGE_SKIP_T1 0x80 -#define SAVAGE_SKIP_ST1 0xc0 -#define SAVAGE_SKIP_ALL_S3D 0x3f -#define SAVAGE_SKIP_ALL_S4 0xff - -/* Buffer names for clear command - */ -#define SAVAGE_FRONT 0x1 -#define SAVAGE_BACK 0x2 -#define SAVAGE_DEPTH 0x4 - -/* 64-bit command header - */ -union drm_savage_cmd_header { - struct { - unsigned char cmd; /* command */ - unsigned char pad0; - unsigned short pad1; - unsigned short pad2; - unsigned short pad3; - } cmd; /* generic */ - struct { - unsigned char cmd; - unsigned char global; /* need idle engine? */ - unsigned short count; /* number of consecutive registers */ - unsigned short start; /* first register */ - unsigned short pad3; - } state; /* SAVAGE_CMD_STATE */ - struct { - unsigned char cmd; - unsigned char prim; /* primitive type */ - unsigned short skip; /* vertex format (skip flags) */ - unsigned short count; /* number of vertices */ - unsigned short start; /* first vertex in DMA/vertex buffer */ - } prim; /* SAVAGE_CMD_DMA_PRIM, SAVAGE_CMD_VB_PRIM */ - struct { - unsigned char cmd; - unsigned char prim; - unsigned short skip; - unsigned short count; /* number of indices that follow */ - unsigned short pad3; - } idx; /* SAVAGE_CMD_DMA_IDX, SAVAGE_CMD_VB_IDX */ - struct { - unsigned char cmd; - unsigned char pad0; - unsigned short pad1; - unsigned int flags; - } clear0; /* SAVAGE_CMD_CLEAR */ - struct { - unsigned int mask; - unsigned int value; - } clear1; /* SAVAGE_CMD_CLEAR data */ -}; - -#if defined(__cplusplus) -} -#endif - -#endif -- cgit v1.2.3 From 20efabc2e80be1df79510b8be9ca004d3ce9be11 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Sat, 3 Dec 2022 18:22:56 +0800 Subject: drm: Remove the obsolete driver-sis Commit 399516ab0fee ("MAINTAINERS: Add a bunch of legacy (UMS) DRM drivers") marked sis driver obsolete 7 years ago. And the mesa UMD of this drm driver already in deprecated list in the link: https://docs.mesa3d.org/systems.html Silicon Integrated Systems->drivers/gpu/drm/sis It's time to remove this driver. Signed-off-by: Cai Huoqing Acked-by: Daniel Vetter Acked-by: Dave Airlie Acked-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20221203102502.3185-6-cai.huoqing@linux.dev --- include/uapi/drm/sis_drm.h | 77 ---------------------------------------------- 1 file changed, 77 deletions(-) delete mode 100644 include/uapi/drm/sis_drm.h (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/sis_drm.h b/include/uapi/drm/sis_drm.h deleted file mode 100644 index 3e3f7e989e0b..000000000000 --- a/include/uapi/drm/sis_drm.h +++ /dev/null @@ -1,77 +0,0 @@ -/* sis_drv.h -- Private header for sis driver -*- linux-c -*- */ -/* - * Copyright 2005 Eric Anholt - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __SIS_DRM_H__ -#define __SIS_DRM_H__ - -#include "drm.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -/* SiS specific ioctls */ -#define NOT_USED_0_3 -#define DRM_SIS_FB_ALLOC 0x04 -#define DRM_SIS_FB_FREE 0x05 -#define NOT_USED_6_12 -#define DRM_SIS_AGP_INIT 0x13 -#define DRM_SIS_AGP_ALLOC 0x14 -#define DRM_SIS_AGP_FREE 0x15 -#define DRM_SIS_FB_INIT 0x16 - -#define DRM_IOCTL_SIS_FB_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_SIS_FB_ALLOC, drm_sis_mem_t) -#define DRM_IOCTL_SIS_FB_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_SIS_FB_FREE, drm_sis_mem_t) -#define DRM_IOCTL_SIS_AGP_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_SIS_AGP_INIT, drm_sis_agp_t) -#define DRM_IOCTL_SIS_AGP_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_SIS_AGP_ALLOC, drm_sis_mem_t) -#define DRM_IOCTL_SIS_AGP_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_SIS_AGP_FREE, drm_sis_mem_t) -#define DRM_IOCTL_SIS_FB_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_SIS_FB_INIT, drm_sis_fb_t) -/* -#define DRM_IOCTL_SIS_FLIP DRM_IOW( 0x48, drm_sis_flip_t) -#define DRM_IOCTL_SIS_FLIP_INIT DRM_IO( 0x49) -#define DRM_IOCTL_SIS_FLIP_FINAL DRM_IO( 0x50) -*/ - -typedef struct { - int context; - unsigned long offset; - unsigned long size; - unsigned long free; -} drm_sis_mem_t; - -typedef struct { - unsigned long offset, size; -} drm_sis_agp_t; - -typedef struct { - unsigned long offset, size; -} drm_sis_fb_t; - -#if defined(__cplusplus) -} -#endif - -#endif /* __SIS_DRM_H__ */ -- cgit v1.2.3 From 8391e000065d4fac88548e071fc43c3e07cb7047 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Sat, 3 Dec 2022 18:22:58 +0800 Subject: drm: Remove the obsolete driver-via Commit 399516ab0fee ("MAINTAINERS: Add a bunch of legacy (UMS) DRM drivers") marked via driver obsolete 7 years ago. And the mesa UMD of this drm driver already in deprecated list in the link: https://docs.mesa3d.org/systems.html VIA Unichrome->drivers/gpu/drm/via It's time to remove this driver. Signed-off-by: Cai Huoqing Acked-by: Daniel Vetter Acked-by: Dave Airlie Acked-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20221203102502.3185-8-cai.huoqing@linux.dev --- include/uapi/drm/via_drm.h | 282 --------------------------------------------- 1 file changed, 282 deletions(-) delete mode 100644 include/uapi/drm/via_drm.h (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/via_drm.h b/include/uapi/drm/via_drm.h deleted file mode 100644 index a1e125d42208..000000000000 --- a/include/uapi/drm/via_drm.h +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved. - * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sub license, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#ifndef _VIA_DRM_H_ -#define _VIA_DRM_H_ - -#include "drm.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -/* WARNING: These defines must be the same as what the Xserver uses. - * if you change them, you must change the defines in the Xserver. - */ - -#ifndef _VIA_DEFINES_ -#define _VIA_DEFINES_ - - -#define VIA_NR_SAREA_CLIPRECTS 8 -#define VIA_NR_XVMC_PORTS 10 -#define VIA_NR_XVMC_LOCKS 5 -#define VIA_MAX_CACHELINE_SIZE 64 -#define XVMCLOCKPTR(saPriv,lockNo) \ - ((volatile struct drm_hw_lock *)(((((unsigned long) (saPriv)->XvMCLockArea) + \ - (VIA_MAX_CACHELINE_SIZE - 1)) & \ - ~(VIA_MAX_CACHELINE_SIZE - 1)) + \ - VIA_MAX_CACHELINE_SIZE*(lockNo))) - -/* Each region is a minimum of 64k, and there are at most 64 of them. - */ -#define VIA_NR_TEX_REGIONS 64 -#define VIA_LOG_MIN_TEX_REGION_SIZE 16 -#endif - -#define VIA_UPLOAD_TEX0IMAGE 0x1 /* handled clientside */ -#define VIA_UPLOAD_TEX1IMAGE 0x2 /* handled clientside */ -#define VIA_UPLOAD_CTX 0x4 -#define VIA_UPLOAD_BUFFERS 0x8 -#define VIA_UPLOAD_TEX0 0x10 -#define VIA_UPLOAD_TEX1 0x20 -#define VIA_UPLOAD_CLIPRECTS 0x40 -#define VIA_UPLOAD_ALL 0xff - -/* VIA specific ioctls */ -#define DRM_VIA_ALLOCMEM 0x00 -#define DRM_VIA_FREEMEM 0x01 -#define DRM_VIA_AGP_INIT 0x02 -#define DRM_VIA_FB_INIT 0x03 -#define DRM_VIA_MAP_INIT 0x04 -#define DRM_VIA_DEC_FUTEX 0x05 -#define NOT_USED -#define DRM_VIA_DMA_INIT 0x07 -#define DRM_VIA_CMDBUFFER 0x08 -#define DRM_VIA_FLUSH 0x09 -#define DRM_VIA_PCICMD 0x0a -#define DRM_VIA_CMDBUF_SIZE 0x0b -#define NOT_USED -#define DRM_VIA_WAIT_IRQ 0x0d -#define DRM_VIA_DMA_BLIT 0x0e -#define DRM_VIA_BLIT_SYNC 0x0f - -#define DRM_IOCTL_VIA_ALLOCMEM DRM_IOWR(DRM_COMMAND_BASE + DRM_VIA_ALLOCMEM, drm_via_mem_t) -#define DRM_IOCTL_VIA_FREEMEM DRM_IOW( DRM_COMMAND_BASE + DRM_VIA_FREEMEM, drm_via_mem_t) -#define DRM_IOCTL_VIA_AGP_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_VIA_AGP_INIT, drm_via_agp_t) -#define DRM_IOCTL_VIA_FB_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_VIA_FB_INIT, drm_via_fb_t) -#define DRM_IOCTL_VIA_MAP_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_VIA_MAP_INIT, drm_via_init_t) -#define DRM_IOCTL_VIA_DEC_FUTEX DRM_IOW( DRM_COMMAND_BASE + DRM_VIA_DEC_FUTEX, drm_via_futex_t) -#define DRM_IOCTL_VIA_DMA_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_VIA_DMA_INIT, drm_via_dma_init_t) -#define DRM_IOCTL_VIA_CMDBUFFER DRM_IOW( DRM_COMMAND_BASE + DRM_VIA_CMDBUFFER, drm_via_cmdbuffer_t) -#define DRM_IOCTL_VIA_FLUSH DRM_IO( DRM_COMMAND_BASE + DRM_VIA_FLUSH) -#define DRM_IOCTL_VIA_PCICMD DRM_IOW( DRM_COMMAND_BASE + DRM_VIA_PCICMD, drm_via_cmdbuffer_t) -#define DRM_IOCTL_VIA_CMDBUF_SIZE DRM_IOWR( DRM_COMMAND_BASE + DRM_VIA_CMDBUF_SIZE, \ - drm_via_cmdbuf_size_t) -#define DRM_IOCTL_VIA_WAIT_IRQ DRM_IOWR( DRM_COMMAND_BASE + DRM_VIA_WAIT_IRQ, drm_via_irqwait_t) -#define DRM_IOCTL_VIA_DMA_BLIT DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_DMA_BLIT, drm_via_dmablit_t) -#define DRM_IOCTL_VIA_BLIT_SYNC DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_BLIT_SYNC, drm_via_blitsync_t) - -/* Indices into buf.Setup where various bits of state are mirrored per - * context and per buffer. These can be fired at the card as a unit, - * or in a piecewise fashion as required. - */ - -#define VIA_TEX_SETUP_SIZE 8 - -/* Flags for clear ioctl - */ -#define VIA_FRONT 0x1 -#define VIA_BACK 0x2 -#define VIA_DEPTH 0x4 -#define VIA_STENCIL 0x8 -#define VIA_MEM_VIDEO 0 /* matches drm constant */ -#define VIA_MEM_AGP 1 /* matches drm constant */ -#define VIA_MEM_SYSTEM 2 -#define VIA_MEM_MIXED 3 -#define VIA_MEM_UNKNOWN 4 - -typedef struct { - __u32 offset; - __u32 size; -} drm_via_agp_t; - -typedef struct { - __u32 offset; - __u32 size; -} drm_via_fb_t; - -typedef struct { - __u32 context; - __u32 type; - __u32 size; - unsigned long index; - unsigned long offset; -} drm_via_mem_t; - -typedef struct _drm_via_init { - enum { - VIA_INIT_MAP = 0x01, - VIA_CLEANUP_MAP = 0x02 - } func; - - unsigned long sarea_priv_offset; - unsigned long fb_offset; - unsigned long mmio_offset; - unsigned long agpAddr; -} drm_via_init_t; - -typedef struct _drm_via_futex { - enum { - VIA_FUTEX_WAIT = 0x00, - VIA_FUTEX_WAKE = 0X01 - } func; - __u32 ms; - __u32 lock; - __u32 val; -} drm_via_futex_t; - -typedef struct _drm_via_dma_init { - enum { - VIA_INIT_DMA = 0x01, - VIA_CLEANUP_DMA = 0x02, - VIA_DMA_INITIALIZED = 0x03 - } func; - - unsigned long offset; - unsigned long size; - unsigned long reg_pause_addr; -} drm_via_dma_init_t; - -typedef struct _drm_via_cmdbuffer { - char __user *buf; - unsigned long size; -} drm_via_cmdbuffer_t; - -/* Warning: If you change the SAREA structure you must change the Xserver - * structure as well */ - -typedef struct _drm_via_tex_region { - unsigned char next, prev; /* indices to form a circular LRU */ - unsigned char inUse; /* owned by a client, or free? */ - int age; /* tracked by clients to update local LRU's */ -} drm_via_tex_region_t; - -typedef struct _drm_via_sarea { - unsigned int dirty; - unsigned int nbox; - struct drm_clip_rect boxes[VIA_NR_SAREA_CLIPRECTS]; - drm_via_tex_region_t texList[VIA_NR_TEX_REGIONS + 1]; - int texAge; /* last time texture was uploaded */ - int ctxOwner; /* last context to upload state */ - int vertexPrim; - - /* - * Below is for XvMC. - * We want the lock integers alone on, and aligned to, a cache line. - * Therefore this somewhat strange construct. - */ - - char XvMCLockArea[VIA_MAX_CACHELINE_SIZE * (VIA_NR_XVMC_LOCKS + 1)]; - - unsigned int XvMCDisplaying[VIA_NR_XVMC_PORTS]; - unsigned int XvMCSubPicOn[VIA_NR_XVMC_PORTS]; - unsigned int XvMCCtxNoGrabbed; /* Last context to hold decoder */ - - /* Used by the 3d driver only at this point, for pageflipping: - */ - unsigned int pfCurrentOffset; -} drm_via_sarea_t; - -typedef struct _drm_via_cmdbuf_size { - enum { - VIA_CMDBUF_SPACE = 0x01, - VIA_CMDBUF_LAG = 0x02 - } func; - int wait; - __u32 size; -} drm_via_cmdbuf_size_t; - -typedef enum { - VIA_IRQ_ABSOLUTE = 0x0, - VIA_IRQ_RELATIVE = 0x1, - VIA_IRQ_SIGNAL = 0x10000000, - VIA_IRQ_FORCE_SEQUENCE = 0x20000000 -} via_irq_seq_type_t; - -#define VIA_IRQ_FLAGS_MASK 0xF0000000 - -enum drm_via_irqs { - drm_via_irq_hqv0 = 0, - drm_via_irq_hqv1, - drm_via_irq_dma0_dd, - drm_via_irq_dma0_td, - drm_via_irq_dma1_dd, - drm_via_irq_dma1_td, - drm_via_irq_num -}; - -struct drm_via_wait_irq_request { - unsigned irq; - via_irq_seq_type_t type; - __u32 sequence; - __u32 signal; -}; - -typedef union drm_via_irqwait { - struct drm_via_wait_irq_request request; - struct drm_wait_vblank_reply reply; -} drm_via_irqwait_t; - -typedef struct drm_via_blitsync { - __u32 sync_handle; - unsigned engine; -} drm_via_blitsync_t; - -/* - * Below,"flags" is currently unused but will be used for possible future - * extensions like kernel space bounce buffers for bad alignments and - * blit engine busy-wait polling for better latency in the absence of - * interrupts. - */ - -typedef struct drm_via_dmablit { - __u32 num_lines; - __u32 line_length; - - __u32 fb_addr; - __u32 fb_stride; - - unsigned char *mem_addr; - __u32 mem_stride; - - __u32 flags; - int to_fb; - - drm_via_blitsync_t sync; -} drm_via_dmablit_t; - -#if defined(__cplusplus) -} -#endif - -#endif /* _VIA_DRM_H_ */ -- cgit v1.2.3 From b1bf64f8b92a084e5c188446e62ea5ccaa9f005d Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 6 Dec 2022 11:21:23 -0800 Subject: drm/msm: Add MSM_SUBMIT_BO_NO_IMPLICIT In cases where implicit sync is used, it is still useful (for things like sub-allocation, etc) to allow userspace to opt-out of implicit sync on per-BO basis. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/514216/ Link: https://lore.kernel.org/r/20221206192123.661448-1-robdclark@gmail.com --- include/uapi/drm/msm_drm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index f54b48ef6a2d..329100016e7c 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -222,10 +222,12 @@ struct drm_msm_gem_submit_cmd { #define MSM_SUBMIT_BO_READ 0x0001 #define MSM_SUBMIT_BO_WRITE 0x0002 #define MSM_SUBMIT_BO_DUMP 0x0004 +#define MSM_SUBMIT_BO_NO_IMPLICIT 0x0008 #define MSM_SUBMIT_BO_FLAGS (MSM_SUBMIT_BO_READ | \ MSM_SUBMIT_BO_WRITE | \ - MSM_SUBMIT_BO_DUMP) + MSM_SUBMIT_BO_DUMP | \ + MSM_SUBMIT_BO_NO_IMPLICIT) struct drm_msm_gem_submit_bo { __u32 flags; /* in, mask of MSM_SUBMIT_BO_x */ -- cgit v1.2.3 From 35b137630f08d913fc2e33df33ccc2570dff3f7d Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 17 Jan 2023 10:27:17 +0100 Subject: accel/ivpu: Introduce a new DRM driver for Intel VPU VPU stands for Versatile Processing Unit and it's a CPU-integrated inference accelerator for Computer Vision and Deep Learning applications. The VPU device consist of following components: - Buttress - provides CPU to VPU integration, interrupt, frequency and power management. - Memory Management Unit (based on ARM MMU-600) - translates VPU to host DMA addresses, isolates user workloads. - RISC based microcontroller - executes firmware that provides job execution API for the kernel-mode driver - Neural Compute Subsystem (NCS) - does the actual work, provides Compute and Copy engines. - Network on Chip (NoC) - network fabric connecting all the components This driver supports VPU IP v2.7 integrated into Intel Meteor Lake client CPUs (14th generation). Module sources are at drivers/accel/ivpu and module name is "intel_vpu.ko". This patch includes only very besic functionality: - module, PCI device and IRQ initialization - register definitions and low level register manipulation functions - SET/GET_PARAM ioctls - power up without firmware Co-developed-by: Krystian Pradzynski Signed-off-by: Krystian Pradzynski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Oded Gabbay Reviewed-by: Jeffrey Hugo Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-2-jacek.lawrynowicz@linux.intel.com --- include/uapi/drm/ivpu_accel.h | 95 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 include/uapi/drm/ivpu_accel.h (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h new file mode 100644 index 000000000000..c6a98977eb8e --- /dev/null +++ b/include/uapi/drm/ivpu_accel.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __UAPI_IVPU_DRM_H__ +#define __UAPI_IVPU_DRM_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_IVPU_DRIVER_MAJOR 1 +#define DRM_IVPU_DRIVER_MINOR 0 + +#define DRM_IVPU_GET_PARAM 0x00 +#define DRM_IVPU_SET_PARAM 0x01 + +#define DRM_IOCTL_IVPU_GET_PARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) + +#define DRM_IOCTL_IVPU_SET_PARAM \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SET_PARAM, struct drm_ivpu_param) + +/** + * DOC: contexts + * + * VPU contexts have private virtual address space, job queues and priority. + * Each context is identified by an unique ID. Context is created on open(). + */ + +#define DRM_IVPU_PARAM_DEVICE_ID 0 +#define DRM_IVPU_PARAM_DEVICE_REVISION 1 +#define DRM_IVPU_PARAM_PLATFORM_TYPE 2 +#define DRM_IVPU_PARAM_CORE_CLOCK_RATE 3 +#define DRM_IVPU_PARAM_NUM_CONTEXTS 4 +#define DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS 5 +#define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 + +#define DRM_IVPU_PLATFORM_TYPE_SILICON 0 + +#define DRM_IVPU_CONTEXT_PRIORITY_IDLE 0 +#define DRM_IVPU_CONTEXT_PRIORITY_NORMAL 1 +#define DRM_IVPU_CONTEXT_PRIORITY_FOCUS 2 +#define DRM_IVPU_CONTEXT_PRIORITY_REALTIME 3 + +/** + * struct drm_ivpu_param - Get/Set VPU parameters + */ +struct drm_ivpu_param { + /** + * @param: + * + * Supported params: + * + * %DRM_IVPU_PARAM_DEVICE_ID: + * PCI Device ID of the VPU device (read-only) + * + * %DRM_IVPU_PARAM_DEVICE_REVISION: + * VPU device revision (read-only) + * + * %DRM_IVPU_PARAM_PLATFORM_TYPE: + * Returns %DRM_IVPU_PLATFORM_TYPE_SILICON on real hardware or device specific + * platform type when executing on a simulator or emulator (read-only) + * + * %DRM_IVPU_PARAM_CORE_CLOCK_RATE: + * Current PLL frequency (read-only) + * + * %DRM_IVPU_PARAM_NUM_CONTEXTS: + * Maximum number of simultaneously existing contexts (read-only) + * + * %DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS: + * Lowest VPU virtual address available in the current context (read-only) + * + * %DRM_IVPU_PARAM_CONTEXT_PRIORITY: + * Value of current context scheduling priority (read-write). + * See DRM_IVPU_CONTEXT_PRIORITY_* for possible values. + * + */ + __u32 param; + + /** @index: Index for params that have multiple instances */ + __u32 index; + + /** @value: Param value */ + __u64 value; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* __UAPI_IVPU_DRM_H__ */ -- cgit v1.2.3 From 263b2ba5fc93c875129e0d2b4034d7d8a34b3d39 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 17 Jan 2023 10:27:18 +0100 Subject: accel/ivpu: Add Intel VPU MMU support VPU Memory Management Unit is based on ARM MMU-600. It allows the creation of multiple virtual address spaces for the device and map noncontinuous host memory (there is no dedicated memory on the VPU). Address space is implemented as a struct ivpu_mmu_context, it has an ID, drm_mm allocator for VPU addresses and struct ivpu_mmu_pgtable that holds actual 3-level, 4KB page table. Context with ID 0 (global context) is created upon driver initialization and it's mainly used for mapping memory required to execute the firmware. Contexts with non-zero IDs are user contexts allocated each time the devices is open()-ed and they map command buffers and other workload-related memory. Workloads executing in a given contexts have access only to the memory mapped in this context. This patch is has two main files: - ivpu_mmu_context.c handles MMU page tables and memory mapping - ivpu_mmu.c implements a driver that programs the MMU device Co-developed-by: Karol Wachowski Signed-off-by: Karol Wachowski Co-developed-by: Krystian Pradzynski Signed-off-by: Krystian Pradzynski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Oded Gabbay Reviewed-by: Jeffrey Hugo Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-3-jacek.lawrynowicz@linux.intel.com --- include/uapi/drm/ivpu_accel.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index c6a98977eb8e..543347df51a1 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -38,6 +38,7 @@ extern "C" { #define DRM_IVPU_PARAM_NUM_CONTEXTS 4 #define DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS 5 #define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 +#define DRM_IVPU_PARAM_CONTEXT_ID 7 #define DRM_IVPU_PLATFORM_TYPE_SILICON 0 @@ -78,6 +79,9 @@ struct drm_ivpu_param { * Value of current context scheduling priority (read-write). * See DRM_IVPU_CONTEXT_PRIORITY_* for possible values. * + * %DRM_IVPU_PARAM_CONTEXT_ID: + * Current context ID, always greater than 0 (read-only) + * */ __u32 param; -- cgit v1.2.3 From 647371a6609ddf8700fe151af72e32daebb9baa7 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 17 Jan 2023 10:27:19 +0100 Subject: accel/ivpu: Add GEM buffer object management Adds four types of GEM-based BOs for the VPU: - shmem - internal - prime All types are implemented as struct ivpu_bo, based on struct drm_gem_object. VPU address is allocated when buffer is created except for imported prime buffers that allocate it in BO_INFO IOCTL due to missing file_priv arg in gem_prime_import callback. Internal buffers are pinned on creation, the rest of buffers types can be pinned on demand (in SUBMIT IOCTL). Buffer VPU address, allocated pages and mappings are released when the buffer is destroyed. Eviction mechanism is planned for future versions. Add two new IOCTLs: BO_CREATE, BO_INFO Signed-off-by: Jacek Lawrynowicz Reviewed-by: Oded Gabbay Reviewed-by: Jeffrey Hugo Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-4-jacek.lawrynowicz@linux.intel.com --- include/uapi/drm/ivpu_accel.h | 94 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 543347df51a1..093b83c5697e 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -17,6 +17,8 @@ extern "C" { #define DRM_IVPU_GET_PARAM 0x00 #define DRM_IVPU_SET_PARAM 0x01 +#define DRM_IVPU_BO_CREATE 0x02 +#define DRM_IVPU_BO_INFO 0x03 #define DRM_IOCTL_IVPU_GET_PARAM \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) @@ -24,6 +26,12 @@ extern "C" { #define DRM_IOCTL_IVPU_SET_PARAM \ DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SET_PARAM, struct drm_ivpu_param) +#define DRM_IOCTL_IVPU_BO_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_CREATE, struct drm_ivpu_bo_create) + +#define DRM_IOCTL_IVPU_BO_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_INFO, struct drm_ivpu_bo_info) + /** * DOC: contexts * @@ -92,6 +100,92 @@ struct drm_ivpu_param { __u64 value; }; +#define DRM_IVPU_BO_HIGH_MEM 0x00000001 +#define DRM_IVPU_BO_MAPPABLE 0x00000002 + +#define DRM_IVPU_BO_CACHED 0x00000000 +#define DRM_IVPU_BO_UNCACHED 0x00010000 +#define DRM_IVPU_BO_WC 0x00020000 +#define DRM_IVPU_BO_CACHE_MASK 0x00030000 + +#define DRM_IVPU_BO_FLAGS \ + (DRM_IVPU_BO_HIGH_MEM | \ + DRM_IVPU_BO_MAPPABLE | \ + DRM_IVPU_BO_CACHE_MASK) + +/** + * struct drm_ivpu_bo_create - Create BO backed by SHMEM + * + * Create GEM buffer object allocated in SHMEM memory. + */ +struct drm_ivpu_bo_create { + /** @size: The size in bytes of the allocated memory */ + __u64 size; + + /** + * @flags: + * + * Supported flags: + * + * %DRM_IVPU_BO_HIGH_MEM: + * + * Allocate VPU address from >4GB range. + * Buffer object with vpu address >4GB can be always accessed by the + * VPU DMA engine, but some HW generation may not be able to access + * this memory from then firmware running on the VPU management processor. + * Suitable for input, output and some scratch buffers. + * + * %DRM_IVPU_BO_MAPPABLE: + * + * Buffer object can be mapped using mmap(). + * + * %DRM_IVPU_BO_CACHED: + * + * Allocated BO will be cached on host side (WB) and snooped on the VPU side. + * This is the default caching mode. + * + * %DRM_IVPU_BO_UNCACHED: + * + * Allocated BO will not be cached on host side nor snooped on the VPU side. + * + * %DRM_IVPU_BO_WC: + * + * Allocated BO will use write combining buffer for writes but reads will be + * uncached. + */ + __u32 flags; + + /** @handle: Returned GEM object handle */ + __u32 handle; + + /** @vpu_addr: Returned VPU virtual address */ + __u64 vpu_addr; +}; + +/** + * struct drm_ivpu_bo_info - Query buffer object info + */ +struct drm_ivpu_bo_info { + /** @handle: Handle of the queried BO */ + __u32 handle; + + /** @flags: Returned flags used to create the BO */ + __u32 flags; + + /** @vpu_addr: Returned VPU virtual address */ + __u64 vpu_addr; + + /** + * @mmap_offset: + * + * Returned offset to be used in mmap(). 0 in case the BO is not mappable. + */ + __u64 mmap_offset; + + /** @size: Returned GEM object size, aligned to PAGE_SIZE */ + __u64 size; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 02d5b0aacd0590dbaf25f35834631e5bc11002e3 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 17 Jan 2023 10:27:21 +0100 Subject: accel/ivpu: Implement firmware parsing and booting Read, parse and boot VPU firmware image. Co-developed-by: Andrzej Kacprowski Signed-off-by: Andrzej Kacprowski Co-developed-by: Krystian Pradzynski Signed-off-by: Krystian Pradzynski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Oded Gabbay Reviewed-by: Jeffrey Hugo Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-6-jacek.lawrynowicz@linux.intel.com --- include/uapi/drm/ivpu_accel.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 093b83c5697e..f05f5e38ea6d 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -47,6 +47,11 @@ extern "C" { #define DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS 5 #define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 #define DRM_IVPU_PARAM_CONTEXT_ID 7 +#define DRM_IVPU_PARAM_FW_API_VERSION 8 +#define DRM_IVPU_PARAM_ENGINE_HEARTBEAT 9 +#define DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID 10 +#define DRM_IVPU_PARAM_TILE_CONFIG 11 +#define DRM_IVPU_PARAM_SKU 12 #define DRM_IVPU_PLATFORM_TYPE_SILICON 0 @@ -90,6 +95,22 @@ struct drm_ivpu_param { * %DRM_IVPU_PARAM_CONTEXT_ID: * Current context ID, always greater than 0 (read-only) * + * %DRM_IVPU_PARAM_FW_API_VERSION: + * Firmware API version array (read-only) + * + * %DRM_IVPU_PARAM_ENGINE_HEARTBEAT: + * Heartbeat value from an engine (read-only). + * Engine ID (i.e. DRM_IVPU_ENGINE_COMPUTE) is given via index. + * + * %DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID: + * Device-unique inference ID (read-only) + * + * %DRM_IVPU_PARAM_TILE_CONFIG: + * VPU tile configuration (read-only) + * + * %DRM_IVPU_PARAM_SKU: + * VPU SKU ID (read-only) + * */ __u32 param; -- cgit v1.2.3 From cd7272215c44676dba236491941c6c406701cc5e Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 17 Jan 2023 10:27:22 +0100 Subject: accel/ivpu: Add command buffer submission logic Each of the user contexts has two command queues, one for compute engine and one for the copy engine. Command queues are allocated and registered in the device when the first job (command buffer) is submitted from the user space to the VPU device. The userspace provides a list of GEM buffer object handles to submit to the VPU, the driver resolves buffer handles, pins physical memory if needed, increments ref count for each buffer and stores pointers to buffer objects in the ivpu_job objects that track jobs submitted to the device. The VPU signals job completion with an asynchronous message that contains the job id passed to firmware when the job was submitted. Currently, the driver supports simple scheduling logic where jobs submitted from user space are immediately pushed to the VPU device command queues. In the future, it will be extended to use hardware base scheduling and/or drm_sched. Co-developed-by: Andrzej Kacprowski Signed-off-by: Andrzej Kacprowski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Oded Gabbay Reviewed-by: Jeffrey Hugo Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-7-jacek.lawrynowicz@linux.intel.com --- include/uapi/drm/ivpu_accel.h | 92 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index f05f5e38ea6d..839820aed87e 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -19,6 +19,8 @@ extern "C" { #define DRM_IVPU_SET_PARAM 0x01 #define DRM_IVPU_BO_CREATE 0x02 #define DRM_IVPU_BO_INFO 0x03 +#define DRM_IVPU_SUBMIT 0x05 +#define DRM_IVPU_BO_WAIT 0x06 #define DRM_IOCTL_IVPU_GET_PARAM \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) @@ -32,6 +34,12 @@ extern "C" { #define DRM_IOCTL_IVPU_BO_INFO \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_INFO, struct drm_ivpu_bo_info) +#define DRM_IOCTL_IVPU_SUBMIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SUBMIT, struct drm_ivpu_submit) + +#define DRM_IOCTL_IVPU_BO_WAIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_WAIT, struct drm_ivpu_bo_wait) + /** * DOC: contexts * @@ -207,6 +215,90 @@ struct drm_ivpu_bo_info { __u64 size; }; +/* drm_ivpu_submit engines */ +#define DRM_IVPU_ENGINE_COMPUTE 0 +#define DRM_IVPU_ENGINE_COPY 1 + +/** + * struct drm_ivpu_submit - Submit commands to the VPU + * + * Execute a single command buffer on a given VPU engine. + * Handles to all referenced buffer objects have to be provided in @buffers_ptr. + * + * User space may wait on job completion using %DRM_IVPU_BO_WAIT ioctl. + */ +struct drm_ivpu_submit { + /** + * @buffers_ptr: + * + * A pointer to an u32 array of GEM handles of the BOs required for this job. + * The number of elements in the array must be equal to the value given by @buffer_count. + * + * The first BO is the command buffer. The rest of array has to contain all + * BOs referenced from the command buffer. + */ + __u64 buffers_ptr; + + /** @buffer_count: Number of elements in the @buffers_ptr */ + __u32 buffer_count; + + /** + * @engine: Select the engine this job should be executed on + * + * %DRM_IVPU_ENGINE_COMPUTE: + * + * Performs Deep Learning Neural Compute Inference Operations + * + * %DRM_IVPU_ENGINE_COPY: + * + * Performs memory copy operations to/from system memory allocated for VPU + */ + __u32 engine; + + /** @flags: Reserved for future use - must be zero */ + __u32 flags; + + /** + * @commands_offset: + * + * Offset inside the first buffer in @buffers_ptr containing commands + * to be executed. The offset has to be 8-byte aligned. + */ + __u32 commands_offset; +}; + +/* drm_ivpu_bo_wait job status codes */ +#define DRM_IVPU_JOB_STATUS_SUCCESS 0 + +/** + * struct drm_ivpu_bo_wait - Wait for BO to become inactive + * + * Blocks until a given buffer object becomes inactive. + * With @timeout_ms set to 0 returns immediately. + */ +struct drm_ivpu_bo_wait { + /** @handle: Handle to the buffer object to be waited on */ + __u32 handle; + + /** @flags: Reserved for future use - must be zero */ + __u32 flags; + + /** @timeout_ns: Absolute timeout in nanoseconds (may be zero) */ + __s64 timeout_ns; + + /** + * @job_status: + * + * Job status code which is updated after the job is completed. + * &DRM_IVPU_JOB_STATUS_SUCCESS or device specific error otherwise. + * Valid only if @handle points to a command buffer. + */ + __u32 job_status; + + /** @pad: Padding - must be zero */ + __u32 pad; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From e3e84b0a03a303421704bd3f305ca91a5226dc7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 24 Dec 2022 17:44:26 -0500 Subject: drm/amdgpu: return the PCIe gen and lanes from the INFO ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For computing PCIe bandwidth in userspace and troubleshooting PCIe bandwidth issues. Note that this intentionally fills holes and padding in drm_amdgpu_info_device. Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20790 Signed-off-by: Marek Olšák Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index fe7f871e3080..973af6d06626 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1053,7 +1053,8 @@ struct drm_amdgpu_info_device { __u32 enabled_rb_pipes_mask; __u32 num_rb_pipes; __u32 num_hw_gfx_contexts; - __u32 _pad; + /* PCIe version (the smaller of the GPU and the CPU/motherboard) */ + __u32 pcie_gen; __u64 ids_flags; /** Starting virtual address for UMDs. */ __u64 virtual_address_offset; @@ -1100,7 +1101,8 @@ struct drm_amdgpu_info_device { __u32 gs_prim_buffer_depth; /* max gs wavefront per vgt*/ __u32 max_gs_waves_per_vgt; - __u32 _pad1; + /* PCIe number of lanes (the smaller of the GPU and the CPU/motherboard) */ + __u32 pcie_num_lanes; /* always on cu bitmap */ __u32 cu_ao_bitmap[4][4]; /** Starting high virtual address for UMDs. */ -- cgit v1.2.3 From 7d25cae7abf4505129f92dc581789c330640564d Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 20 Dec 2022 14:12:19 +0200 Subject: habanalabs/uapi: move uapi file to drm Move the habanalabs.h uapi file from include/uapi/misc to include/uapi/drm, and rename it to habanalabs_accel.h. This is required before moving the actual driver to the accel subsystem. Update MAINTAINERS file accordingly. Signed-off-by: Oded Gabbay --- include/uapi/drm/habanalabs_accel.h | 2225 +++++++++++++++++++++++++++++++++++ 1 file changed, 2225 insertions(+) create mode 100644 include/uapi/drm/habanalabs_accel.h (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h new file mode 100644 index 000000000000..90e628779264 --- /dev/null +++ b/include/uapi/drm/habanalabs_accel.h @@ -0,0 +1,2225 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + * + * Copyright 2016-2022 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef HABANALABS_H_ +#define HABANALABS_H_ + +#include +#include + +/* + * Defines that are asic-specific but constitutes as ABI between kernel driver + * and userspace + */ +#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ +#define GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START 0x80 /* 128 bytes */ + +/* + * 128 SOBs reserved for collective wait + * 16 SOBs reserved for sync stream + */ +#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT 144 + +/* + * 64 monitors reserved for collective wait + * 8 monitors reserved for sync stream + */ +#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 72 + +/* Max number of elements in timestamps registration buffers */ +#define TS_MAX_ELEMENTS_NUM (1 << 20) /* 1MB */ + +/* + * Goya queue Numbering + * + * The external queues (PCI DMA channels) MUST be before the internal queues + * and each group (PCI DMA channels and internal) must be contiguous inside + * itself but there can be a gap between the two groups (although not + * recommended) + */ + +enum goya_queue_id { + GOYA_QUEUE_ID_DMA_0 = 0, + GOYA_QUEUE_ID_DMA_1 = 1, + GOYA_QUEUE_ID_DMA_2 = 2, + GOYA_QUEUE_ID_DMA_3 = 3, + GOYA_QUEUE_ID_DMA_4 = 4, + GOYA_QUEUE_ID_CPU_PQ = 5, + GOYA_QUEUE_ID_MME = 6, /* Internal queues start here */ + GOYA_QUEUE_ID_TPC0 = 7, + GOYA_QUEUE_ID_TPC1 = 8, + GOYA_QUEUE_ID_TPC2 = 9, + GOYA_QUEUE_ID_TPC3 = 10, + GOYA_QUEUE_ID_TPC4 = 11, + GOYA_QUEUE_ID_TPC5 = 12, + GOYA_QUEUE_ID_TPC6 = 13, + GOYA_QUEUE_ID_TPC7 = 14, + GOYA_QUEUE_ID_SIZE +}; + +/* + * Gaudi queue Numbering + * External queues (PCI DMA channels) are DMA_0_*, DMA_1_* and DMA_5_*. + * Except one CPU queue, all the rest are internal queues. + */ + +enum gaudi_queue_id { + GAUDI_QUEUE_ID_DMA_0_0 = 0, /* external */ + GAUDI_QUEUE_ID_DMA_0_1 = 1, /* external */ + GAUDI_QUEUE_ID_DMA_0_2 = 2, /* external */ + GAUDI_QUEUE_ID_DMA_0_3 = 3, /* external */ + GAUDI_QUEUE_ID_DMA_1_0 = 4, /* external */ + GAUDI_QUEUE_ID_DMA_1_1 = 5, /* external */ + GAUDI_QUEUE_ID_DMA_1_2 = 6, /* external */ + GAUDI_QUEUE_ID_DMA_1_3 = 7, /* external */ + GAUDI_QUEUE_ID_CPU_PQ = 8, /* CPU */ + GAUDI_QUEUE_ID_DMA_2_0 = 9, /* internal */ + GAUDI_QUEUE_ID_DMA_2_1 = 10, /* internal */ + GAUDI_QUEUE_ID_DMA_2_2 = 11, /* internal */ + GAUDI_QUEUE_ID_DMA_2_3 = 12, /* internal */ + GAUDI_QUEUE_ID_DMA_3_0 = 13, /* internal */ + GAUDI_QUEUE_ID_DMA_3_1 = 14, /* internal */ + GAUDI_QUEUE_ID_DMA_3_2 = 15, /* internal */ + GAUDI_QUEUE_ID_DMA_3_3 = 16, /* internal */ + GAUDI_QUEUE_ID_DMA_4_0 = 17, /* internal */ + GAUDI_QUEUE_ID_DMA_4_1 = 18, /* internal */ + GAUDI_QUEUE_ID_DMA_4_2 = 19, /* internal */ + GAUDI_QUEUE_ID_DMA_4_3 = 20, /* internal */ + GAUDI_QUEUE_ID_DMA_5_0 = 21, /* internal */ + GAUDI_QUEUE_ID_DMA_5_1 = 22, /* internal */ + GAUDI_QUEUE_ID_DMA_5_2 = 23, /* internal */ + GAUDI_QUEUE_ID_DMA_5_3 = 24, /* internal */ + GAUDI_QUEUE_ID_DMA_6_0 = 25, /* internal */ + GAUDI_QUEUE_ID_DMA_6_1 = 26, /* internal */ + GAUDI_QUEUE_ID_DMA_6_2 = 27, /* internal */ + GAUDI_QUEUE_ID_DMA_6_3 = 28, /* internal */ + GAUDI_QUEUE_ID_DMA_7_0 = 29, /* internal */ + GAUDI_QUEUE_ID_DMA_7_1 = 30, /* internal */ + GAUDI_QUEUE_ID_DMA_7_2 = 31, /* internal */ + GAUDI_QUEUE_ID_DMA_7_3 = 32, /* internal */ + GAUDI_QUEUE_ID_MME_0_0 = 33, /* internal */ + GAUDI_QUEUE_ID_MME_0_1 = 34, /* internal */ + GAUDI_QUEUE_ID_MME_0_2 = 35, /* internal */ + GAUDI_QUEUE_ID_MME_0_3 = 36, /* internal */ + GAUDI_QUEUE_ID_MME_1_0 = 37, /* internal */ + GAUDI_QUEUE_ID_MME_1_1 = 38, /* internal */ + GAUDI_QUEUE_ID_MME_1_2 = 39, /* internal */ + GAUDI_QUEUE_ID_MME_1_3 = 40, /* internal */ + GAUDI_QUEUE_ID_TPC_0_0 = 41, /* internal */ + GAUDI_QUEUE_ID_TPC_0_1 = 42, /* internal */ + GAUDI_QUEUE_ID_TPC_0_2 = 43, /* internal */ + GAUDI_QUEUE_ID_TPC_0_3 = 44, /* internal */ + GAUDI_QUEUE_ID_TPC_1_0 = 45, /* internal */ + GAUDI_QUEUE_ID_TPC_1_1 = 46, /* internal */ + GAUDI_QUEUE_ID_TPC_1_2 = 47, /* internal */ + GAUDI_QUEUE_ID_TPC_1_3 = 48, /* internal */ + GAUDI_QUEUE_ID_TPC_2_0 = 49, /* internal */ + GAUDI_QUEUE_ID_TPC_2_1 = 50, /* internal */ + GAUDI_QUEUE_ID_TPC_2_2 = 51, /* internal */ + GAUDI_QUEUE_ID_TPC_2_3 = 52, /* internal */ + GAUDI_QUEUE_ID_TPC_3_0 = 53, /* internal */ + GAUDI_QUEUE_ID_TPC_3_1 = 54, /* internal */ + GAUDI_QUEUE_ID_TPC_3_2 = 55, /* internal */ + GAUDI_QUEUE_ID_TPC_3_3 = 56, /* internal */ + GAUDI_QUEUE_ID_TPC_4_0 = 57, /* internal */ + GAUDI_QUEUE_ID_TPC_4_1 = 58, /* internal */ + GAUDI_QUEUE_ID_TPC_4_2 = 59, /* internal */ + GAUDI_QUEUE_ID_TPC_4_3 = 60, /* internal */ + GAUDI_QUEUE_ID_TPC_5_0 = 61, /* internal */ + GAUDI_QUEUE_ID_TPC_5_1 = 62, /* internal */ + GAUDI_QUEUE_ID_TPC_5_2 = 63, /* internal */ + GAUDI_QUEUE_ID_TPC_5_3 = 64, /* internal */ + GAUDI_QUEUE_ID_TPC_6_0 = 65, /* internal */ + GAUDI_QUEUE_ID_TPC_6_1 = 66, /* internal */ + GAUDI_QUEUE_ID_TPC_6_2 = 67, /* internal */ + GAUDI_QUEUE_ID_TPC_6_3 = 68, /* internal */ + GAUDI_QUEUE_ID_TPC_7_0 = 69, /* internal */ + GAUDI_QUEUE_ID_TPC_7_1 = 70, /* internal */ + GAUDI_QUEUE_ID_TPC_7_2 = 71, /* internal */ + GAUDI_QUEUE_ID_TPC_7_3 = 72, /* internal */ + GAUDI_QUEUE_ID_NIC_0_0 = 73, /* internal */ + GAUDI_QUEUE_ID_NIC_0_1 = 74, /* internal */ + GAUDI_QUEUE_ID_NIC_0_2 = 75, /* internal */ + GAUDI_QUEUE_ID_NIC_0_3 = 76, /* internal */ + GAUDI_QUEUE_ID_NIC_1_0 = 77, /* internal */ + GAUDI_QUEUE_ID_NIC_1_1 = 78, /* internal */ + GAUDI_QUEUE_ID_NIC_1_2 = 79, /* internal */ + GAUDI_QUEUE_ID_NIC_1_3 = 80, /* internal */ + GAUDI_QUEUE_ID_NIC_2_0 = 81, /* internal */ + GAUDI_QUEUE_ID_NIC_2_1 = 82, /* internal */ + GAUDI_QUEUE_ID_NIC_2_2 = 83, /* internal */ + GAUDI_QUEUE_ID_NIC_2_3 = 84, /* internal */ + GAUDI_QUEUE_ID_NIC_3_0 = 85, /* internal */ + GAUDI_QUEUE_ID_NIC_3_1 = 86, /* internal */ + GAUDI_QUEUE_ID_NIC_3_2 = 87, /* internal */ + GAUDI_QUEUE_ID_NIC_3_3 = 88, /* internal */ + GAUDI_QUEUE_ID_NIC_4_0 = 89, /* internal */ + GAUDI_QUEUE_ID_NIC_4_1 = 90, /* internal */ + GAUDI_QUEUE_ID_NIC_4_2 = 91, /* internal */ + GAUDI_QUEUE_ID_NIC_4_3 = 92, /* internal */ + GAUDI_QUEUE_ID_NIC_5_0 = 93, /* internal */ + GAUDI_QUEUE_ID_NIC_5_1 = 94, /* internal */ + GAUDI_QUEUE_ID_NIC_5_2 = 95, /* internal */ + GAUDI_QUEUE_ID_NIC_5_3 = 96, /* internal */ + GAUDI_QUEUE_ID_NIC_6_0 = 97, /* internal */ + GAUDI_QUEUE_ID_NIC_6_1 = 98, /* internal */ + GAUDI_QUEUE_ID_NIC_6_2 = 99, /* internal */ + GAUDI_QUEUE_ID_NIC_6_3 = 100, /* internal */ + GAUDI_QUEUE_ID_NIC_7_0 = 101, /* internal */ + GAUDI_QUEUE_ID_NIC_7_1 = 102, /* internal */ + GAUDI_QUEUE_ID_NIC_7_2 = 103, /* internal */ + GAUDI_QUEUE_ID_NIC_7_3 = 104, /* internal */ + GAUDI_QUEUE_ID_NIC_8_0 = 105, /* internal */ + GAUDI_QUEUE_ID_NIC_8_1 = 106, /* internal */ + GAUDI_QUEUE_ID_NIC_8_2 = 107, /* internal */ + GAUDI_QUEUE_ID_NIC_8_3 = 108, /* internal */ + GAUDI_QUEUE_ID_NIC_9_0 = 109, /* internal */ + GAUDI_QUEUE_ID_NIC_9_1 = 110, /* internal */ + GAUDI_QUEUE_ID_NIC_9_2 = 111, /* internal */ + GAUDI_QUEUE_ID_NIC_9_3 = 112, /* internal */ + GAUDI_QUEUE_ID_SIZE +}; + +/* + * In GAUDI2 we have two modes of operation in regard to queues: + * 1. Legacy mode, where each QMAN exposes 4 streams to the user + * 2. F/W mode, where we use F/W to schedule the JOBS to the different queues. + * + * When in legacy mode, the user sends the queue id per JOB according to + * enum gaudi2_queue_id below. + * + * When in F/W mode, the user sends a stream id per Command Submission. The + * stream id is a running number from 0 up to (N-1), where N is the number + * of streams the F/W exposes and is passed to the user in + * struct hl_info_hw_ip_info + */ + +enum gaudi2_queue_id { + GAUDI2_QUEUE_ID_PDMA_0_0 = 0, + GAUDI2_QUEUE_ID_PDMA_0_1 = 1, + GAUDI2_QUEUE_ID_PDMA_0_2 = 2, + GAUDI2_QUEUE_ID_PDMA_0_3 = 3, + GAUDI2_QUEUE_ID_PDMA_1_0 = 4, + GAUDI2_QUEUE_ID_PDMA_1_1 = 5, + GAUDI2_QUEUE_ID_PDMA_1_2 = 6, + GAUDI2_QUEUE_ID_PDMA_1_3 = 7, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0 = 8, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1 = 9, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2 = 10, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3 = 11, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0 = 12, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1 = 13, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2 = 14, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3 = 15, + GAUDI2_QUEUE_ID_DCORE0_MME_0_0 = 16, + GAUDI2_QUEUE_ID_DCORE0_MME_0_1 = 17, + GAUDI2_QUEUE_ID_DCORE0_MME_0_2 = 18, + GAUDI2_QUEUE_ID_DCORE0_MME_0_3 = 19, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 = 20, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_1 = 21, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_2 = 22, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_3 = 23, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_0 = 24, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_1 = 25, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_2 = 26, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_3 = 27, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_0 = 28, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_1 = 29, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_2 = 30, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_3 = 31, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_0 = 32, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_1 = 33, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_2 = 34, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_3 = 35, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_0 = 36, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_1 = 37, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_2 = 38, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_3 = 39, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_0 = 40, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_1 = 41, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_2 = 42, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_3 = 43, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 = 44, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_1 = 45, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_2 = 46, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_3 = 47, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0 = 48, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1 = 49, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2 = 50, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3 = 51, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0 = 52, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1 = 53, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2 = 54, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3 = 55, + GAUDI2_QUEUE_ID_DCORE1_MME_0_0 = 56, + GAUDI2_QUEUE_ID_DCORE1_MME_0_1 = 57, + GAUDI2_QUEUE_ID_DCORE1_MME_0_2 = 58, + GAUDI2_QUEUE_ID_DCORE1_MME_0_3 = 59, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 = 60, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_1 = 61, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_2 = 62, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_3 = 63, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_0 = 64, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_1 = 65, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_2 = 66, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_3 = 67, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_0 = 68, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_1 = 69, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_2 = 70, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_3 = 71, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_0 = 72, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_1 = 73, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_2 = 74, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_3 = 75, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_0 = 76, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_1 = 77, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_2 = 78, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_3 = 79, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_0 = 80, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_1 = 81, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_2 = 82, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_3 = 83, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0 = 84, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1 = 85, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2 = 86, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3 = 87, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0 = 88, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1 = 89, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2 = 90, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3 = 91, + GAUDI2_QUEUE_ID_DCORE2_MME_0_0 = 92, + GAUDI2_QUEUE_ID_DCORE2_MME_0_1 = 93, + GAUDI2_QUEUE_ID_DCORE2_MME_0_2 = 94, + GAUDI2_QUEUE_ID_DCORE2_MME_0_3 = 95, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 = 96, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_1 = 97, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_2 = 98, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_3 = 99, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_0 = 100, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_1 = 101, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_2 = 102, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_3 = 103, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_0 = 104, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_1 = 105, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_2 = 106, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_3 = 107, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_0 = 108, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_1 = 109, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_2 = 110, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_3 = 111, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_0 = 112, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_1 = 113, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_2 = 114, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_3 = 115, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_0 = 116, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_1 = 117, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_2 = 118, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_3 = 119, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0 = 120, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1 = 121, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2 = 122, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3 = 123, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0 = 124, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1 = 125, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2 = 126, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3 = 127, + GAUDI2_QUEUE_ID_DCORE3_MME_0_0 = 128, + GAUDI2_QUEUE_ID_DCORE3_MME_0_1 = 129, + GAUDI2_QUEUE_ID_DCORE3_MME_0_2 = 130, + GAUDI2_QUEUE_ID_DCORE3_MME_0_3 = 131, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 = 132, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_1 = 133, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_2 = 134, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_3 = 135, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_0 = 136, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_1 = 137, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_2 = 138, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_3 = 139, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_0 = 140, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_1 = 141, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_2 = 142, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_3 = 143, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_0 = 144, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_1 = 145, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_2 = 146, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_3 = 147, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_0 = 148, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_1 = 149, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_2 = 150, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_3 = 151, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_0 = 152, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_1 = 153, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_2 = 154, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_3 = 155, + GAUDI2_QUEUE_ID_NIC_0_0 = 156, + GAUDI2_QUEUE_ID_NIC_0_1 = 157, + GAUDI2_QUEUE_ID_NIC_0_2 = 158, + GAUDI2_QUEUE_ID_NIC_0_3 = 159, + GAUDI2_QUEUE_ID_NIC_1_0 = 160, + GAUDI2_QUEUE_ID_NIC_1_1 = 161, + GAUDI2_QUEUE_ID_NIC_1_2 = 162, + GAUDI2_QUEUE_ID_NIC_1_3 = 163, + GAUDI2_QUEUE_ID_NIC_2_0 = 164, + GAUDI2_QUEUE_ID_NIC_2_1 = 165, + GAUDI2_QUEUE_ID_NIC_2_2 = 166, + GAUDI2_QUEUE_ID_NIC_2_3 = 167, + GAUDI2_QUEUE_ID_NIC_3_0 = 168, + GAUDI2_QUEUE_ID_NIC_3_1 = 169, + GAUDI2_QUEUE_ID_NIC_3_2 = 170, + GAUDI2_QUEUE_ID_NIC_3_3 = 171, + GAUDI2_QUEUE_ID_NIC_4_0 = 172, + GAUDI2_QUEUE_ID_NIC_4_1 = 173, + GAUDI2_QUEUE_ID_NIC_4_2 = 174, + GAUDI2_QUEUE_ID_NIC_4_3 = 175, + GAUDI2_QUEUE_ID_NIC_5_0 = 176, + GAUDI2_QUEUE_ID_NIC_5_1 = 177, + GAUDI2_QUEUE_ID_NIC_5_2 = 178, + GAUDI2_QUEUE_ID_NIC_5_3 = 179, + GAUDI2_QUEUE_ID_NIC_6_0 = 180, + GAUDI2_QUEUE_ID_NIC_6_1 = 181, + GAUDI2_QUEUE_ID_NIC_6_2 = 182, + GAUDI2_QUEUE_ID_NIC_6_3 = 183, + GAUDI2_QUEUE_ID_NIC_7_0 = 184, + GAUDI2_QUEUE_ID_NIC_7_1 = 185, + GAUDI2_QUEUE_ID_NIC_7_2 = 186, + GAUDI2_QUEUE_ID_NIC_7_3 = 187, + GAUDI2_QUEUE_ID_NIC_8_0 = 188, + GAUDI2_QUEUE_ID_NIC_8_1 = 189, + GAUDI2_QUEUE_ID_NIC_8_2 = 190, + GAUDI2_QUEUE_ID_NIC_8_3 = 191, + GAUDI2_QUEUE_ID_NIC_9_0 = 192, + GAUDI2_QUEUE_ID_NIC_9_1 = 193, + GAUDI2_QUEUE_ID_NIC_9_2 = 194, + GAUDI2_QUEUE_ID_NIC_9_3 = 195, + GAUDI2_QUEUE_ID_NIC_10_0 = 196, + GAUDI2_QUEUE_ID_NIC_10_1 = 197, + GAUDI2_QUEUE_ID_NIC_10_2 = 198, + GAUDI2_QUEUE_ID_NIC_10_3 = 199, + GAUDI2_QUEUE_ID_NIC_11_0 = 200, + GAUDI2_QUEUE_ID_NIC_11_1 = 201, + GAUDI2_QUEUE_ID_NIC_11_2 = 202, + GAUDI2_QUEUE_ID_NIC_11_3 = 203, + GAUDI2_QUEUE_ID_NIC_12_0 = 204, + GAUDI2_QUEUE_ID_NIC_12_1 = 205, + GAUDI2_QUEUE_ID_NIC_12_2 = 206, + GAUDI2_QUEUE_ID_NIC_12_3 = 207, + GAUDI2_QUEUE_ID_NIC_13_0 = 208, + GAUDI2_QUEUE_ID_NIC_13_1 = 209, + GAUDI2_QUEUE_ID_NIC_13_2 = 210, + GAUDI2_QUEUE_ID_NIC_13_3 = 211, + GAUDI2_QUEUE_ID_NIC_14_0 = 212, + GAUDI2_QUEUE_ID_NIC_14_1 = 213, + GAUDI2_QUEUE_ID_NIC_14_2 = 214, + GAUDI2_QUEUE_ID_NIC_14_3 = 215, + GAUDI2_QUEUE_ID_NIC_15_0 = 216, + GAUDI2_QUEUE_ID_NIC_15_1 = 217, + GAUDI2_QUEUE_ID_NIC_15_2 = 218, + GAUDI2_QUEUE_ID_NIC_15_3 = 219, + GAUDI2_QUEUE_ID_NIC_16_0 = 220, + GAUDI2_QUEUE_ID_NIC_16_1 = 221, + GAUDI2_QUEUE_ID_NIC_16_2 = 222, + GAUDI2_QUEUE_ID_NIC_16_3 = 223, + GAUDI2_QUEUE_ID_NIC_17_0 = 224, + GAUDI2_QUEUE_ID_NIC_17_1 = 225, + GAUDI2_QUEUE_ID_NIC_17_2 = 226, + GAUDI2_QUEUE_ID_NIC_17_3 = 227, + GAUDI2_QUEUE_ID_NIC_18_0 = 228, + GAUDI2_QUEUE_ID_NIC_18_1 = 229, + GAUDI2_QUEUE_ID_NIC_18_2 = 230, + GAUDI2_QUEUE_ID_NIC_18_3 = 231, + GAUDI2_QUEUE_ID_NIC_19_0 = 232, + GAUDI2_QUEUE_ID_NIC_19_1 = 233, + GAUDI2_QUEUE_ID_NIC_19_2 = 234, + GAUDI2_QUEUE_ID_NIC_19_3 = 235, + GAUDI2_QUEUE_ID_NIC_20_0 = 236, + GAUDI2_QUEUE_ID_NIC_20_1 = 237, + GAUDI2_QUEUE_ID_NIC_20_2 = 238, + GAUDI2_QUEUE_ID_NIC_20_3 = 239, + GAUDI2_QUEUE_ID_NIC_21_0 = 240, + GAUDI2_QUEUE_ID_NIC_21_1 = 241, + GAUDI2_QUEUE_ID_NIC_21_2 = 242, + GAUDI2_QUEUE_ID_NIC_21_3 = 243, + GAUDI2_QUEUE_ID_NIC_22_0 = 244, + GAUDI2_QUEUE_ID_NIC_22_1 = 245, + GAUDI2_QUEUE_ID_NIC_22_2 = 246, + GAUDI2_QUEUE_ID_NIC_22_3 = 247, + GAUDI2_QUEUE_ID_NIC_23_0 = 248, + GAUDI2_QUEUE_ID_NIC_23_1 = 249, + GAUDI2_QUEUE_ID_NIC_23_2 = 250, + GAUDI2_QUEUE_ID_NIC_23_3 = 251, + GAUDI2_QUEUE_ID_ROT_0_0 = 252, + GAUDI2_QUEUE_ID_ROT_0_1 = 253, + GAUDI2_QUEUE_ID_ROT_0_2 = 254, + GAUDI2_QUEUE_ID_ROT_0_3 = 255, + GAUDI2_QUEUE_ID_ROT_1_0 = 256, + GAUDI2_QUEUE_ID_ROT_1_1 = 257, + GAUDI2_QUEUE_ID_ROT_1_2 = 258, + GAUDI2_QUEUE_ID_ROT_1_3 = 259, + GAUDI2_QUEUE_ID_CPU_PQ = 260, + GAUDI2_QUEUE_ID_SIZE +}; + +/* + * Engine Numbering + * + * Used in the "busy_engines_mask" field in `struct hl_info_hw_idle' + */ + +enum goya_engine_id { + GOYA_ENGINE_ID_DMA_0 = 0, + GOYA_ENGINE_ID_DMA_1, + GOYA_ENGINE_ID_DMA_2, + GOYA_ENGINE_ID_DMA_3, + GOYA_ENGINE_ID_DMA_4, + GOYA_ENGINE_ID_MME_0, + GOYA_ENGINE_ID_TPC_0, + GOYA_ENGINE_ID_TPC_1, + GOYA_ENGINE_ID_TPC_2, + GOYA_ENGINE_ID_TPC_3, + GOYA_ENGINE_ID_TPC_4, + GOYA_ENGINE_ID_TPC_5, + GOYA_ENGINE_ID_TPC_6, + GOYA_ENGINE_ID_TPC_7, + GOYA_ENGINE_ID_SIZE +}; + +enum gaudi_engine_id { + GAUDI_ENGINE_ID_DMA_0 = 0, + GAUDI_ENGINE_ID_DMA_1, + GAUDI_ENGINE_ID_DMA_2, + GAUDI_ENGINE_ID_DMA_3, + GAUDI_ENGINE_ID_DMA_4, + GAUDI_ENGINE_ID_DMA_5, + GAUDI_ENGINE_ID_DMA_6, + GAUDI_ENGINE_ID_DMA_7, + GAUDI_ENGINE_ID_MME_0, + GAUDI_ENGINE_ID_MME_1, + GAUDI_ENGINE_ID_MME_2, + GAUDI_ENGINE_ID_MME_3, + GAUDI_ENGINE_ID_TPC_0, + GAUDI_ENGINE_ID_TPC_1, + GAUDI_ENGINE_ID_TPC_2, + GAUDI_ENGINE_ID_TPC_3, + GAUDI_ENGINE_ID_TPC_4, + GAUDI_ENGINE_ID_TPC_5, + GAUDI_ENGINE_ID_TPC_6, + GAUDI_ENGINE_ID_TPC_7, + GAUDI_ENGINE_ID_NIC_0, + GAUDI_ENGINE_ID_NIC_1, + GAUDI_ENGINE_ID_NIC_2, + GAUDI_ENGINE_ID_NIC_3, + GAUDI_ENGINE_ID_NIC_4, + GAUDI_ENGINE_ID_NIC_5, + GAUDI_ENGINE_ID_NIC_6, + GAUDI_ENGINE_ID_NIC_7, + GAUDI_ENGINE_ID_NIC_8, + GAUDI_ENGINE_ID_NIC_9, + GAUDI_ENGINE_ID_SIZE +}; + +enum gaudi2_engine_id { + GAUDI2_DCORE0_ENGINE_ID_EDMA_0 = 0, + GAUDI2_DCORE0_ENGINE_ID_EDMA_1, + GAUDI2_DCORE0_ENGINE_ID_MME, + GAUDI2_DCORE0_ENGINE_ID_TPC_0, + GAUDI2_DCORE0_ENGINE_ID_TPC_1, + GAUDI2_DCORE0_ENGINE_ID_TPC_2, + GAUDI2_DCORE0_ENGINE_ID_TPC_3, + GAUDI2_DCORE0_ENGINE_ID_TPC_4, + GAUDI2_DCORE0_ENGINE_ID_TPC_5, + GAUDI2_DCORE0_ENGINE_ID_DEC_0, + GAUDI2_DCORE0_ENGINE_ID_DEC_1, + GAUDI2_DCORE1_ENGINE_ID_EDMA_0, + GAUDI2_DCORE1_ENGINE_ID_EDMA_1, + GAUDI2_DCORE1_ENGINE_ID_MME, + GAUDI2_DCORE1_ENGINE_ID_TPC_0, + GAUDI2_DCORE1_ENGINE_ID_TPC_1, + GAUDI2_DCORE1_ENGINE_ID_TPC_2, + GAUDI2_DCORE1_ENGINE_ID_TPC_3, + GAUDI2_DCORE1_ENGINE_ID_TPC_4, + GAUDI2_DCORE1_ENGINE_ID_TPC_5, + GAUDI2_DCORE1_ENGINE_ID_DEC_0, + GAUDI2_DCORE1_ENGINE_ID_DEC_1, + GAUDI2_DCORE2_ENGINE_ID_EDMA_0, + GAUDI2_DCORE2_ENGINE_ID_EDMA_1, + GAUDI2_DCORE2_ENGINE_ID_MME, + GAUDI2_DCORE2_ENGINE_ID_TPC_0, + GAUDI2_DCORE2_ENGINE_ID_TPC_1, + GAUDI2_DCORE2_ENGINE_ID_TPC_2, + GAUDI2_DCORE2_ENGINE_ID_TPC_3, + GAUDI2_DCORE2_ENGINE_ID_TPC_4, + GAUDI2_DCORE2_ENGINE_ID_TPC_5, + GAUDI2_DCORE2_ENGINE_ID_DEC_0, + GAUDI2_DCORE2_ENGINE_ID_DEC_1, + GAUDI2_DCORE3_ENGINE_ID_EDMA_0, + GAUDI2_DCORE3_ENGINE_ID_EDMA_1, + GAUDI2_DCORE3_ENGINE_ID_MME, + GAUDI2_DCORE3_ENGINE_ID_TPC_0, + GAUDI2_DCORE3_ENGINE_ID_TPC_1, + GAUDI2_DCORE3_ENGINE_ID_TPC_2, + GAUDI2_DCORE3_ENGINE_ID_TPC_3, + GAUDI2_DCORE3_ENGINE_ID_TPC_4, + GAUDI2_DCORE3_ENGINE_ID_TPC_5, + GAUDI2_DCORE3_ENGINE_ID_DEC_0, + GAUDI2_DCORE3_ENGINE_ID_DEC_1, + GAUDI2_DCORE0_ENGINE_ID_TPC_6, + GAUDI2_ENGINE_ID_PDMA_0, + GAUDI2_ENGINE_ID_PDMA_1, + GAUDI2_ENGINE_ID_ROT_0, + GAUDI2_ENGINE_ID_ROT_1, + GAUDI2_PCIE_ENGINE_ID_DEC_0, + GAUDI2_PCIE_ENGINE_ID_DEC_1, + GAUDI2_ENGINE_ID_NIC0_0, + GAUDI2_ENGINE_ID_NIC0_1, + GAUDI2_ENGINE_ID_NIC1_0, + GAUDI2_ENGINE_ID_NIC1_1, + GAUDI2_ENGINE_ID_NIC2_0, + GAUDI2_ENGINE_ID_NIC2_1, + GAUDI2_ENGINE_ID_NIC3_0, + GAUDI2_ENGINE_ID_NIC3_1, + GAUDI2_ENGINE_ID_NIC4_0, + GAUDI2_ENGINE_ID_NIC4_1, + GAUDI2_ENGINE_ID_NIC5_0, + GAUDI2_ENGINE_ID_NIC5_1, + GAUDI2_ENGINE_ID_NIC6_0, + GAUDI2_ENGINE_ID_NIC6_1, + GAUDI2_ENGINE_ID_NIC7_0, + GAUDI2_ENGINE_ID_NIC7_1, + GAUDI2_ENGINE_ID_NIC8_0, + GAUDI2_ENGINE_ID_NIC8_1, + GAUDI2_ENGINE_ID_NIC9_0, + GAUDI2_ENGINE_ID_NIC9_1, + GAUDI2_ENGINE_ID_NIC10_0, + GAUDI2_ENGINE_ID_NIC10_1, + GAUDI2_ENGINE_ID_NIC11_0, + GAUDI2_ENGINE_ID_NIC11_1, + GAUDI2_ENGINE_ID_PCIE, + GAUDI2_ENGINE_ID_PSOC, + GAUDI2_ENGINE_ID_ARC_FARM, + GAUDI2_ENGINE_ID_KDMA, + GAUDI2_ENGINE_ID_SIZE +}; + +/* + * ASIC specific PLL index + * + * Used to retrieve in frequency info of different IPs via + * HL_INFO_PLL_FREQUENCY under HL_IOCTL_INFO IOCTL. The enums need to be + * used as an index in struct hl_pll_frequency_info + */ + +enum hl_goya_pll_index { + HL_GOYA_CPU_PLL = 0, + HL_GOYA_IC_PLL, + HL_GOYA_MC_PLL, + HL_GOYA_MME_PLL, + HL_GOYA_PCI_PLL, + HL_GOYA_EMMC_PLL, + HL_GOYA_TPC_PLL, + HL_GOYA_PLL_MAX +}; + +enum hl_gaudi_pll_index { + HL_GAUDI_CPU_PLL = 0, + HL_GAUDI_PCI_PLL, + HL_GAUDI_SRAM_PLL, + HL_GAUDI_HBM_PLL, + HL_GAUDI_NIC_PLL, + HL_GAUDI_DMA_PLL, + HL_GAUDI_MESH_PLL, + HL_GAUDI_MME_PLL, + HL_GAUDI_TPC_PLL, + HL_GAUDI_IF_PLL, + HL_GAUDI_PLL_MAX +}; + +enum hl_gaudi2_pll_index { + HL_GAUDI2_CPU_PLL = 0, + HL_GAUDI2_PCI_PLL, + HL_GAUDI2_SRAM_PLL, + HL_GAUDI2_HBM_PLL, + HL_GAUDI2_NIC_PLL, + HL_GAUDI2_DMA_PLL, + HL_GAUDI2_MESH_PLL, + HL_GAUDI2_MME_PLL, + HL_GAUDI2_TPC_PLL, + HL_GAUDI2_IF_PLL, + HL_GAUDI2_VID_PLL, + HL_GAUDI2_MSS_PLL, + HL_GAUDI2_PLL_MAX +}; + +/** + * enum hl_goya_dma_direction - Direction of DMA operation inside a LIN_DMA packet that is + * submitted to the GOYA's DMA QMAN. This attribute is not relevant + * to the H/W but the kernel driver use it to parse the packet's + * addresses and patch/validate them. + * @HL_DMA_HOST_TO_DRAM: DMA operation from Host memory to GOYA's DDR. + * @HL_DMA_HOST_TO_SRAM: DMA operation from Host memory to GOYA's SRAM. + * @HL_DMA_DRAM_TO_SRAM: DMA operation from GOYA's DDR to GOYA's SRAM. + * @HL_DMA_SRAM_TO_DRAM: DMA operation from GOYA's SRAM to GOYA's DDR. + * @HL_DMA_SRAM_TO_HOST: DMA operation from GOYA's SRAM to Host memory. + * @HL_DMA_DRAM_TO_HOST: DMA operation from GOYA's DDR to Host memory. + * @HL_DMA_DRAM_TO_DRAM: DMA operation from GOYA's DDR to GOYA's DDR. + * @HL_DMA_SRAM_TO_SRAM: DMA operation from GOYA's SRAM to GOYA's SRAM. + * @HL_DMA_ENUM_MAX: number of values in enum + */ +enum hl_goya_dma_direction { + HL_DMA_HOST_TO_DRAM, + HL_DMA_HOST_TO_SRAM, + HL_DMA_DRAM_TO_SRAM, + HL_DMA_SRAM_TO_DRAM, + HL_DMA_SRAM_TO_HOST, + HL_DMA_DRAM_TO_HOST, + HL_DMA_DRAM_TO_DRAM, + HL_DMA_SRAM_TO_SRAM, + HL_DMA_ENUM_MAX +}; + +/** + * enum hl_device_status - Device status information. + * @HL_DEVICE_STATUS_OPERATIONAL: Device is operational. + * @HL_DEVICE_STATUS_IN_RESET: Device is currently during reset. + * @HL_DEVICE_STATUS_MALFUNCTION: Device is unusable. + * @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was disabled. + * @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation is still in + * progress. + * @HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: Device is currently during reset that was + * triggered because the user released the device + * @HL_DEVICE_STATUS_LAST: Last status. + */ +enum hl_device_status { + HL_DEVICE_STATUS_OPERATIONAL, + HL_DEVICE_STATUS_IN_RESET, + HL_DEVICE_STATUS_MALFUNCTION, + HL_DEVICE_STATUS_NEEDS_RESET, + HL_DEVICE_STATUS_IN_DEVICE_CREATION, + HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE, + HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE +}; + +enum hl_server_type { + HL_SERVER_TYPE_UNKNOWN = 0, + HL_SERVER_GAUDI_HLS1 = 1, + HL_SERVER_GAUDI_HLS1H = 2, + HL_SERVER_GAUDI_TYPE1 = 3, + HL_SERVER_GAUDI_TYPE2 = 4, + HL_SERVER_GAUDI2_HLS2 = 5 +}; + +/* + * Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command + * + * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event + * HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code + * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset + * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error + * HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable + * HL_NOTIFIER_EVENT_USER_ENGINE_ERR - Indicates device engine in error state + * HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error + * HL_NOTIFIER_EVENT_RAZWI - Indicates razwi happened + * HL_NOTIFIER_EVENT_PAGE_FAULT - Indicates page fault happened + */ +#define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) +#define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) +#define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2) +#define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3) +#define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4) +#define HL_NOTIFIER_EVENT_USER_ENGINE_ERR (1ULL << 5) +#define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6) +#define HL_NOTIFIER_EVENT_RAZWI (1ULL << 7) +#define HL_NOTIFIER_EVENT_PAGE_FAULT (1ULL << 8) + +/* Opcode for management ioctl + * + * HW_IP_INFO - Receive information about different IP blocks in the + * device. + * HL_INFO_HW_EVENTS - Receive an array describing how many times each event + * occurred since the last hard reset. + * HL_INFO_DRAM_USAGE - Retrieve the dram usage inside the device and of the + * specific context. This is relevant only for devices + * where the dram is managed by the kernel driver + * HL_INFO_HW_IDLE - Retrieve information about the idle status of each + * internal engine. + * HL_INFO_DEVICE_STATUS - Retrieve the device's status. This opcode doesn't + * require an open context. + * HL_INFO_DEVICE_UTILIZATION - Retrieve the total utilization of the device + * over the last period specified by the user. + * The period can be between 100ms to 1s, in + * resolution of 100ms. The return value is a + * percentage of the utilization rate. + * HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each + * event occurred since the driver was loaded. + * HL_INFO_CLK_RATE - Retrieve the current and maximum clock rate + * of the device in MHz. The maximum clock rate is + * configurable via sysfs parameter + * HL_INFO_RESET_COUNT - Retrieve the counts of the soft and hard reset + * operations performed on the device since the last + * time the driver was loaded. + * HL_INFO_TIME_SYNC - Retrieve the device's time alongside the host's time + * for synchronization. + * HL_INFO_CS_COUNTERS - Retrieve command submission counters + * HL_INFO_PCI_COUNTERS - Retrieve PCI counters + * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason + * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore + * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption + * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency + * HL_INFO_POWER - Retrieve power information + * HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls + * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info + * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num + * HL_INFO_LAST_ERR_OPEN_DEV_TIME - Retrieve timestamp of the last time the device was opened + * and CS timeout or razwi error occurred. + * HL_INFO_CS_TIMEOUT_EVENT - Retrieve CS timeout timestamp and its related CS sequence number. + * HL_INFO_RAZWI_EVENT - Retrieve parameters of razwi: + * Timestamp of razwi. + * The address which accessing it caused the razwi. + * Razwi initiator. + * Razwi cause, was it a page fault or MMU access error. + * HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device memory allocation + * HL_INFO_SECURED_ATTESTATION - Retrieve attestation report of the boot. + * HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications. + * HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd + * HL_INFO_GET_EVENTS - Retrieve the last occurred events + * HL_INFO_UNDEFINED_OPCODE_EVENT - Retrieve last undefined opcode error information. + * HL_INFO_ENGINE_STATUS - Retrieve the status of all the h/w engines in the asic. + * HL_INFO_PAGE_FAULT_EVENT - Retrieve parameters of captured page fault. + * HL_INFO_USER_MAPPINGS - Retrieve user mappings, captured after page fault event. + * HL_INFO_FW_GENERIC_REQ - Send generic request to FW. + */ +#define HL_INFO_HW_IP_INFO 0 +#define HL_INFO_HW_EVENTS 1 +#define HL_INFO_DRAM_USAGE 2 +#define HL_INFO_HW_IDLE 3 +#define HL_INFO_DEVICE_STATUS 4 +#define HL_INFO_DEVICE_UTILIZATION 6 +#define HL_INFO_HW_EVENTS_AGGREGATE 7 +#define HL_INFO_CLK_RATE 8 +#define HL_INFO_RESET_COUNT 9 +#define HL_INFO_TIME_SYNC 10 +#define HL_INFO_CS_COUNTERS 11 +#define HL_INFO_PCI_COUNTERS 12 +#define HL_INFO_CLK_THROTTLE_REASON 13 +#define HL_INFO_SYNC_MANAGER 14 +#define HL_INFO_TOTAL_ENERGY 15 +#define HL_INFO_PLL_FREQUENCY 16 +#define HL_INFO_POWER 17 +#define HL_INFO_OPEN_STATS 18 +#define HL_INFO_DRAM_REPLACED_ROWS 21 +#define HL_INFO_DRAM_PENDING_ROWS 22 +#define HL_INFO_LAST_ERR_OPEN_DEV_TIME 23 +#define HL_INFO_CS_TIMEOUT_EVENT 24 +#define HL_INFO_RAZWI_EVENT 25 +#define HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES 26 +#define HL_INFO_SECURED_ATTESTATION 27 +#define HL_INFO_REGISTER_EVENTFD 28 +#define HL_INFO_UNREGISTER_EVENTFD 29 +#define HL_INFO_GET_EVENTS 30 +#define HL_INFO_UNDEFINED_OPCODE_EVENT 31 +#define HL_INFO_ENGINE_STATUS 32 +#define HL_INFO_PAGE_FAULT_EVENT 33 +#define HL_INFO_USER_MAPPINGS 34 +#define HL_INFO_FW_GENERIC_REQ 35 + +#define HL_INFO_VERSION_MAX_LEN 128 +#define HL_INFO_CARD_NAME_MAX_LEN 16 + +/* Maximum buffer size for retrieving engines status */ +#define HL_ENGINES_DATA_MAX_SIZE SZ_1M + +/** + * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC + * @sram_base_address: The first SRAM physical base address that is free to be + * used by the user. + * @dram_base_address: The first DRAM virtual or physical base address that is + * free to be used by the user. + * @dram_size: The DRAM size that is available to the user. + * @sram_size: The SRAM size that is available to the user. + * @num_of_events: The number of events that can be received from the f/w. This + * is needed so the user can what is the size of the h/w events + * array he needs to pass to the kernel when he wants to fetch + * the event counters. + * @device_id: PCI device ID of the ASIC. + * @module_id: Module ID of the ASIC for mezzanine cards in servers + * (From OCP spec). + * @decoder_enabled_mask: Bit-mask that represents which decoders are enabled. + * @first_available_interrupt_id: The first available interrupt ID for the user + * to be used when it works with user interrupts. + * Relevant for Gaudi2 and later. + * @server_type: Server type that the Gaudi ASIC is currently installed in. + * The value is according to enum hl_server_type + * @cpld_version: CPLD version on the board. + * @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler + * in some ASICs. + * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant + * for Goya/Gaudi only. + * @dram_enabled: Whether the DRAM is enabled. + * @security_enabled: Whether security is enabled on device. + * @mme_master_slave_mode: Indicate whether the MME is working in master/slave + * configuration. Relevant for Greco and later. + * @cpucp_version: The CPUCP f/w version. + * @card_name: The card name as passed by the f/w. + * @tpc_enabled_mask_ext: Bit-mask that represents which TPCs are enabled. + * Relevant for Greco and later. + * @dram_page_size: The DRAM physical page size. + * @edma_enabled_mask: Bit-mask that represents which EDMAs are enabled. + * Relevant for Gaudi2 and later. + * @number_of_user_interrupts: The number of interrupts that are available to the userspace + * application to use. Relevant for Gaudi2 and later. + * @device_mem_alloc_default_page_size: default page size used in device memory allocation. + * @revision_id: PCI revision ID of the ASIC. + */ +struct hl_info_hw_ip_info { + __u64 sram_base_address; + __u64 dram_base_address; + __u64 dram_size; + __u32 sram_size; + __u32 num_of_events; + __u32 device_id; + __u32 module_id; + __u32 decoder_enabled_mask; + __u16 first_available_interrupt_id; + __u16 server_type; + __u32 cpld_version; + __u32 psoc_pci_pll_nr; + __u32 psoc_pci_pll_nf; + __u32 psoc_pci_pll_od; + __u32 psoc_pci_pll_div_factor; + __u8 tpc_enabled_mask; + __u8 dram_enabled; + __u8 security_enabled; + __u8 mme_master_slave_mode; + __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; + __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; + __u64 tpc_enabled_mask_ext; + __u64 dram_page_size; + __u32 edma_enabled_mask; + __u16 number_of_user_interrupts; + __u16 pad2; + __u64 reserved4; + __u64 device_mem_alloc_default_page_size; + __u64 reserved5; + __u64 reserved6; + __u32 reserved7; + __u8 reserved8; + __u8 revision_id; + __u8 pad[2]; +}; + +struct hl_info_dram_usage { + __u64 dram_free_mem; + __u64 ctx_dram_mem; +}; + +#define HL_BUSY_ENGINES_MASK_EXT_SIZE 4 + +struct hl_info_hw_idle { + __u32 is_idle; + /* + * Bitmask of busy engines. + * Bits definition is according to `enum _engine_id'. + */ + __u32 busy_engines_mask; + + /* + * Extended Bitmask of busy engines. + * Bits definition is according to `enum _engine_id'. + */ + __u64 busy_engines_mask_ext[HL_BUSY_ENGINES_MASK_EXT_SIZE]; +}; + +struct hl_info_device_status { + __u32 status; + __u32 pad; +}; + +struct hl_info_device_utilization { + __u32 utilization; + __u32 pad; +}; + +struct hl_info_clk_rate { + __u32 cur_clk_rate_mhz; + __u32 max_clk_rate_mhz; +}; + +struct hl_info_reset_count { + __u32 hard_reset_cnt; + __u32 soft_reset_cnt; +}; + +struct hl_info_time_sync { + __u64 device_time; + __u64 host_time; +}; + +/** + * struct hl_info_pci_counters - pci counters + * @rx_throughput: PCI rx throughput KBps + * @tx_throughput: PCI tx throughput KBps + * @replay_cnt: PCI replay counter + */ +struct hl_info_pci_counters { + __u64 rx_throughput; + __u64 tx_throughput; + __u64 replay_cnt; +}; + +enum hl_clk_throttling_type { + HL_CLK_THROTTLE_TYPE_POWER, + HL_CLK_THROTTLE_TYPE_THERMAL, + HL_CLK_THROTTLE_TYPE_MAX +}; + +/* clk_throttling_reason masks */ +#define HL_CLK_THROTTLE_POWER (1 << HL_CLK_THROTTLE_TYPE_POWER) +#define HL_CLK_THROTTLE_THERMAL (1 << HL_CLK_THROTTLE_TYPE_THERMAL) + +/** + * struct hl_info_clk_throttle - clock throttling reason + * @clk_throttling_reason: each bit represents a clk throttling reason + * @clk_throttling_timestamp_us: represents CPU timestamp in microseconds of the start-event + * @clk_throttling_duration_ns: the clock throttle time in nanosec + */ +struct hl_info_clk_throttle { + __u32 clk_throttling_reason; + __u32 pad; + __u64 clk_throttling_timestamp_us[HL_CLK_THROTTLE_TYPE_MAX]; + __u64 clk_throttling_duration_ns[HL_CLK_THROTTLE_TYPE_MAX]; +}; + +/** + * struct hl_info_energy - device energy information + * @total_energy_consumption: total device energy consumption + */ +struct hl_info_energy { + __u64 total_energy_consumption; +}; + +#define HL_PLL_NUM_OUTPUTS 4 + +struct hl_pll_frequency_info { + __u16 output[HL_PLL_NUM_OUTPUTS]; +}; + +/** + * struct hl_open_stats_info - device open statistics information + * @open_counter: ever growing counter, increased on each successful dev open + * @last_open_period_ms: duration (ms) device was open last time + * @is_compute_ctx_active: Whether there is an active compute context executing + * @compute_ctx_in_release: true if the current compute context is being released + */ +struct hl_open_stats_info { + __u64 open_counter; + __u64 last_open_period_ms; + __u8 is_compute_ctx_active; + __u8 compute_ctx_in_release; + __u8 pad[6]; +}; + +/** + * struct hl_power_info - power information + * @power: power consumption + */ +struct hl_power_info { + __u64 power; +}; + +/** + * struct hl_info_sync_manager - sync manager information + * @first_available_sync_object: first available sob + * @first_available_monitor: first available monitor + * @first_available_cq: first available cq + */ +struct hl_info_sync_manager { + __u32 first_available_sync_object; + __u32 first_available_monitor; + __u32 first_available_cq; + __u32 reserved; +}; + +/** + * struct hl_info_cs_counters - command submission counters + * @total_out_of_mem_drop_cnt: total dropped due to memory allocation issue + * @ctx_out_of_mem_drop_cnt: context dropped due to memory allocation issue + * @total_parsing_drop_cnt: total dropped due to error in packet parsing + * @ctx_parsing_drop_cnt: context dropped due to error in packet parsing + * @total_queue_full_drop_cnt: total dropped due to queue full + * @ctx_queue_full_drop_cnt: context dropped due to queue full + * @total_device_in_reset_drop_cnt: total dropped due to device in reset + * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset + * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight + * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight + * @total_validation_drop_cnt: total dropped due to validation error + * @ctx_validation_drop_cnt: context dropped due to validation error + */ +struct hl_info_cs_counters { + __u64 total_out_of_mem_drop_cnt; + __u64 ctx_out_of_mem_drop_cnt; + __u64 total_parsing_drop_cnt; + __u64 ctx_parsing_drop_cnt; + __u64 total_queue_full_drop_cnt; + __u64 ctx_queue_full_drop_cnt; + __u64 total_device_in_reset_drop_cnt; + __u64 ctx_device_in_reset_drop_cnt; + __u64 total_max_cs_in_flight_drop_cnt; + __u64 ctx_max_cs_in_flight_drop_cnt; + __u64 total_validation_drop_cnt; + __u64 ctx_validation_drop_cnt; +}; + +/** + * struct hl_info_last_err_open_dev_time - last error boot information. + * @timestamp: timestamp of last time the device was opened and error occurred. + */ +struct hl_info_last_err_open_dev_time { + __s64 timestamp; +}; + +/** + * struct hl_info_cs_timeout_event - last CS timeout information. + * @timestamp: timestamp when last CS timeout event occurred. + * @seq: sequence number of last CS timeout event. + */ +struct hl_info_cs_timeout_event { + __s64 timestamp; + __u64 seq; +}; + +#define HL_RAZWI_NA_ENG_ID U16_MAX +#define HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR 128 +#define HL_RAZWI_READ BIT(0) +#define HL_RAZWI_WRITE BIT(1) +#define HL_RAZWI_LBW BIT(2) +#define HL_RAZWI_HBW BIT(3) +#define HL_RAZWI_RR BIT(4) +#define HL_RAZWI_ADDR_DEC BIT(5) + +/** + * struct hl_info_razwi_event - razwi information. + * @timestamp: timestamp of razwi. + * @addr: address which accessing it caused razwi. + * @engine_id: engine id of the razwi initiator, if it was initiated by engine that does not + * have engine id it will be set to HL_RAZWI_NA_ENG_ID. If there are several possible + * engines which caused the razwi, it will hold all of them. + * @num_of_possible_engines: contains number of possible engine ids. In some asics, razwi indication + * might be common for several engines and there is no way to get the + * exact engine. In this way, engine_id array will be filled with all + * possible engines caused this razwi. Also, there might be possibility + * in gaudi, where we don't indication on specific engine, in that case + * the value of this parameter will be zero. + * @flags: bitmask for additional data: HL_RAZWI_READ - razwi caused by read operation + * HL_RAZWI_WRITE - razwi caused by write operation + * HL_RAZWI_LBW - razwi caused by lbw fabric transaction + * HL_RAZWI_HBW - razwi caused by hbw fabric transaction + * HL_RAZWI_RR - razwi caused by range register + * HL_RAZWI_ADDR_DEC - razwi caused by address decode error + * Note: this data is not supported by all asics, in that case the relevant bits will not + * be set. + */ +struct hl_info_razwi_event { + __s64 timestamp; + __u64 addr; + __u16 engine_id[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR]; + __u16 num_of_possible_engines; + __u8 flags; + __u8 pad[5]; +}; + +#define MAX_QMAN_STREAMS_INFO 4 +#define OPCODE_INFO_MAX_ADDR_SIZE 8 +/** + * struct hl_info_undefined_opcode_event - info about last undefined opcode error + * @timestamp: timestamp of the undefined opcode error + * @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ + * entries. In case all streams array entries are + * filled with values, it means the execution was in Lower-CP. + * @cq_addr: the address of the current handled command buffer + * @cq_size: the size of the current handled command buffer + * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array. + * should be equal to 1 in case of undefined opcode + * in Upper-CP (specific stream) and equal to 4 incase + * of undefined opcode in Lower-CP. + * @engine_id: engine-id that the error occurred on + * @stream_id: the stream id the error occurred on. In case the stream equals to + * MAX_QMAN_STREAMS_INFO it means the error occurred on a Lower-CP. + */ +struct hl_info_undefined_opcode_event { + __s64 timestamp; + __u64 cb_addr_streams[MAX_QMAN_STREAMS_INFO][OPCODE_INFO_MAX_ADDR_SIZE]; + __u64 cq_addr; + __u32 cq_size; + __u32 cb_addr_streams_len; + __u32 engine_id; + __u32 stream_id; +}; + +/** + * struct hl_info_dev_memalloc_page_sizes - valid page sizes in device mem alloc information. + * @page_order_bitmask: bitmap in which a set bit represents the order of the supported page size + * (e.g. 0x2100000 means that 1MB and 32MB pages are supported). + */ +struct hl_info_dev_memalloc_page_sizes { + __u64 page_order_bitmask; +}; + +#define SEC_PCR_DATA_BUF_SZ 256 +#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ +#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ + +/* + * struct hl_info_sec_attest - attestation report of the boot + * @nonce: number only used once. random number provided by host. this also passed to the quote + * command as a qualifying data. + * @pcr_quote_len: length of the attestation quote data (bytes) + * @pub_data_len: length of the public data (bytes) + * @certificate_len: length of the certificate (bytes) + * @pcr_num_reg: number of PCR registers in the pcr_data array + * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) + * @quote_sig_len: length of the attestation report signature (bytes) + * @pcr_data: raw values of the PCR registers + * @pcr_quote: attestation report data structure + * @quote_sig: signature structure of the attestation report + * @public_data: public key for the signed attestation + * (outPublic + name + qualifiedName) + * @certificate: certificate for the attestation signing key + */ +struct hl_info_sec_attest { + __u32 nonce; + __u16 pcr_quote_len; + __u16 pub_data_len; + __u16 certificate_len; + __u8 pcr_num_reg; + __u8 pcr_reg_len; + __u8 quote_sig_len; + __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; + __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; + __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; + __u8 pad0[2]; +}; + +/** + * struct hl_page_fault_info - page fault information. + * @timestamp: timestamp of page fault. + * @addr: address which accessing it caused page fault. + * @engine_id: engine id which caused the page fault, supported only in gaudi3. + */ +struct hl_page_fault_info { + __s64 timestamp; + __u64 addr; + __u16 engine_id; + __u8 pad[6]; +}; + +/** + * struct hl_user_mapping - user mapping information. + * @dev_va: device virtual address. + * @size: virtual address mapping size. + */ +struct hl_user_mapping { + __u64 dev_va; + __u64 size; +}; + +enum gaudi_dcores { + HL_GAUDI_WS_DCORE, + HL_GAUDI_WN_DCORE, + HL_GAUDI_EN_DCORE, + HL_GAUDI_ES_DCORE +}; + +/** + * struct hl_info_args - Main structure to retrieve device related information. + * @return_pointer: User space address of the relevant structure related to HL_INFO_* operation + * mentioned in @op. + * @return_size: Size of the structure used in @return_pointer, just like "size" in "snprintf", it + * limits how many bytes the kernel can write. For hw_events array, the size should be + * hl_info_hw_ip_info.num_of_events * sizeof(__u32). + * @op: Defines which type of information to be retrieved. Refer HL_INFO_* for details. + * @dcore_id: DCORE id for which the information is relevant (for Gaudi refer to enum gaudi_dcores). + * @ctx_id: Context ID of the user. Currently not in use. + * @period_ms: Period value, in milliseconds, for utilization rate in range 100ms - 1000ms in 100 ms + * resolution. Currently not in use. + * @pll_index: Index as defined in hl__pll_index enumeration. + * @eventfd: event file descriptor for event notifications. + * @user_buffer_actual_size: Actual data size which was copied to user allocated buffer by the + * driver. It is possible for the user to allocate buffer larger than + * needed, hence updating this variable so user will know the exact amount + * of bytes copied by the kernel to the buffer. + * @sec_attest_nonce: Nonce number used for attestation report. + * @array_size: Number of array members copied to user buffer. + * Relevant for HL_INFO_USER_MAPPINGS info ioctl. + * @fw_sub_opcode: generic requests sub opcodes. + * @pad: Padding to 64 bit. + */ +struct hl_info_args { + __u64 return_pointer; + __u32 return_size; + __u32 op; + + union { + __u32 dcore_id; + __u32 ctx_id; + __u32 period_ms; + __u32 pll_index; + __u32 eventfd; + __u32 user_buffer_actual_size; + __u32 sec_attest_nonce; + __u32 array_size; + __u32 fw_sub_opcode; + }; + + __u32 pad; +}; + +/* Opcode to create a new command buffer */ +#define HL_CB_OP_CREATE 0 +/* Opcode to destroy previously created command buffer */ +#define HL_CB_OP_DESTROY 1 +/* Opcode to retrieve information about a command buffer */ +#define HL_CB_OP_INFO 2 + +/* 2MB minus 32 bytes for 2xMSG_PROT */ +#define HL_MAX_CB_SIZE (0x200000 - 32) + +/* Indicates whether the command buffer should be mapped to the device's MMU */ +#define HL_CB_FLAGS_MAP 0x1 + +/* Used with HL_CB_OP_INFO opcode to get the device va address for kernel mapped CB */ +#define HL_CB_FLAGS_GET_DEVICE_VA 0x2 + +struct hl_cb_in { + /* Handle of CB or 0 if we want to create one */ + __u64 cb_handle; + /* HL_CB_OP_* */ + __u32 op; + + /* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that + * will be allocated, regardless of this parameter's value, is PAGE_SIZE + */ + __u32 cb_size; + + /* Context ID - Currently not in use */ + __u32 ctx_id; + /* HL_CB_FLAGS_* */ + __u32 flags; +}; + +struct hl_cb_out { + union { + /* Handle of CB */ + __u64 cb_handle; + + union { + /* Information about CB */ + struct { + /* Usage count of CB */ + __u32 usage_cnt; + __u32 pad; + }; + + /* CB mapped address to device MMU */ + __u64 device_va; + }; + }; +}; + +union hl_cb_args { + struct hl_cb_in in; + struct hl_cb_out out; +}; + +/* HL_CS_CHUNK_FLAGS_ values + * + * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB: + * Indicates if the CB was allocated and mapped by userspace + * (relevant to greco and above). User allocated CB is a command buffer, + * allocated by the user, via malloc (or similar). After allocating the + * CB, the user invokes - “memory ioctl” to map the user memory into a + * device virtual address. The user provides this address via the + * cb_handle field. The interface provides the ability to create a + * large CBs, Which aren’t limited to “HL_MAX_CB_SIZE”. Therefore, it + * increases the PCI-DMA queues throughput. This CB allocation method + * also reduces the use of Linux DMA-able memory pool. Which are limited + * and used by other Linux sub-systems. + */ +#define HL_CS_CHUNK_FLAGS_USER_ALLOC_CB 0x1 + +/* + * This structure size must always be fixed to 64-bytes for backward + * compatibility + */ +struct hl_cs_chunk { + union { + /* Goya/Gaudi: + * For external queue, this represents a Handle of CB on the + * Host. + * For internal queue in Goya, this represents an SRAM or + * a DRAM address of the internal CB. In Gaudi, this might also + * represent a mapped host address of the CB. + * + * Greco onwards: + * For H/W queue, this represents either a Handle of CB on the + * Host, or an SRAM, a DRAM, or a mapped host address of the CB. + * + * A mapped host address is in the device address space, after + * a host address was mapped by the device MMU. + */ + __u64 cb_handle; + + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set + * This holds address of array of u64 values that contain + * signal CS sequence numbers. The wait described by + * this job will listen on all those signals + * (wait event per signal) + */ + __u64 signal_seq_arr; + + /* + * Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set + * along with HL_CS_FLAGS_ENCAP_SIGNALS. + * This is the CS sequence which has the encapsulated signals. + */ + __u64 encaps_signal_seq; + }; + + /* Index of queue to put the CB on */ + __u32 queue_index; + + union { + /* + * Size of command buffer with valid packets + * Can be smaller then actual CB size + */ + __u32 cb_size; + + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set. + * Number of entries in signal_seq_arr + */ + __u32 num_signal_seq_arr; + + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set along + * with HL_CS_FLAGS_ENCAP_SIGNALS + * This set the signals range that the user want to wait for + * out of the whole reserved signals range. + * e.g if the signals range is 20, and user don't want + * to wait for signal 8, so he set this offset to 7, then + * he call the API again with 9 and so on till 20. + */ + __u32 encaps_signal_offset; + }; + + /* HL_CS_CHUNK_FLAGS_* */ + __u32 cs_chunk_flags; + + /* Relevant only when HL_CS_FLAGS_COLLECTIVE_WAIT is set. + * This holds the collective engine ID. The wait described by this job + * will sync with this engine and with all NICs before completion. + */ + __u32 collective_engine_id; + + /* Align structure to 64 bytes */ + __u32 pad[10]; +}; + +/* SIGNAL/WAIT/COLLECTIVE_WAIT flags are mutually exclusive */ +#define HL_CS_FLAGS_FORCE_RESTORE 0x1 +#define HL_CS_FLAGS_SIGNAL 0x2 +#define HL_CS_FLAGS_WAIT 0x4 +#define HL_CS_FLAGS_COLLECTIVE_WAIT 0x8 + +#define HL_CS_FLAGS_TIMESTAMP 0x20 +#define HL_CS_FLAGS_STAGED_SUBMISSION 0x40 +#define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST 0x80 +#define HL_CS_FLAGS_STAGED_SUBMISSION_LAST 0x100 +#define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200 +#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400 + +/* + * The encapsulated signals CS is merged into the existing CS ioctls. + * In order to use this feature need to follow the below procedure: + * 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY + * the output of this API will be the SOB offset from CFG_BASE. + * this address will be used to patch CB cmds to do the signaling for this + * SOB by incrementing it's value. + * for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY + * CS type, note that this might fail if out-of-sync happened to the SOB + * value, in case other signaling request to the same SOB occurred between + * reserve-unreserve calls. + * 2. Use the staged CS to do the encapsulated signaling jobs. + * use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST + * along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset + * field. This offset allows app to wait on part of the reserved signals. + * 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag + * to wait for the encapsulated signals. + */ +#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800 +#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000 +#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000 + +/* + * The engine cores CS is merged into the existing CS ioctls. + * Use it to control the engine cores mode. + */ +#define HL_CS_FLAGS_ENGINE_CORE_COMMAND 0x4000 + +#define HL_CS_STATUS_SUCCESS 0 + +#define HL_MAX_JOBS_PER_CS 512 + +/* HL_ENGINE_CORE_ values + * + * HL_ENGINE_CORE_HALT: engine core halt + * HL_ENGINE_CORE_RUN: engine core run + */ +#define HL_ENGINE_CORE_HALT (1 << 0) +#define HL_ENGINE_CORE_RUN (1 << 1) + +struct hl_cs_in { + + union { + struct { + /* this holds address of array of hl_cs_chunk for restore phase */ + __u64 chunks_restore; + + /* holds address of array of hl_cs_chunk for execution phase */ + __u64 chunks_execute; + }; + + /* Valid only when HL_CS_FLAGS_ENGINE_CORE_COMMAND is set */ + struct { + /* this holds address of array of uint32 for engine_cores */ + __u64 engine_cores; + + /* number of engine cores in engine_cores array */ + __u32 num_engine_cores; + + /* the core command to be sent towards engine cores */ + __u32 core_command; + }; + }; + + union { + /* + * Sequence number of a staged submission CS + * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and + * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset. + */ + __u64 seq; + + /* + * Encapsulated signals handle id + * Valid for two flows: + * 1. CS with encapsulated signals: + * when HL_CS_FLAGS_STAGED_SUBMISSION and + * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST + * and HL_CS_FLAGS_ENCAP_SIGNALS are set. + * 2. unreserve signals: + * valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set. + */ + __u32 encaps_sig_handle_id; + + /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ + struct { + /* Encapsulated signals number */ + __u32 encaps_signals_count; + + /* Encapsulated signals queue index (stream) */ + __u32 encaps_signals_q_idx; + }; + }; + + /* Number of chunks in restore phase array. Maximum number is + * HL_MAX_JOBS_PER_CS + */ + __u32 num_chunks_restore; + + /* Number of chunks in execution array. Maximum number is + * HL_MAX_JOBS_PER_CS + */ + __u32 num_chunks_execute; + + /* timeout in seconds - valid only if HL_CS_FLAGS_CUSTOM_TIMEOUT + * is set + */ + __u32 timeout; + + /* HL_CS_FLAGS_* */ + __u32 cs_flags; + + /* Context ID - Currently not in use */ + __u32 ctx_id; + __u8 pad[4]; +}; + +struct hl_cs_out { + union { + /* + * seq holds the sequence number of the CS to pass to wait + * ioctl. All values are valid except for 0 and ULLONG_MAX + */ + __u64 seq; + + /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ + struct { + /* This is the reserved signal handle id */ + __u32 handle_id; + + /* This is the signals count */ + __u32 count; + }; + }; + + /* HL_CS_STATUS */ + __u32 status; + + /* + * SOB base address offset + * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY or HL_CS_FLAGS_SIGNAL is set + */ + __u32 sob_base_addr_offset; + + /* + * Count of completed signals in SOB before current signal submission. + * Valid only when (HL_CS_FLAGS_ENCAP_SIGNALS & HL_CS_FLAGS_STAGED_SUBMISSION) + * or HL_CS_FLAGS_SIGNAL is set + */ + __u16 sob_count_before_submission; + __u16 pad[3]; +}; + +union hl_cs_args { + struct hl_cs_in in; + struct hl_cs_out out; +}; + +#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 +#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 +#define HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT 0xFFF00000 +#define HL_WAIT_CS_FLAGS_ANY_DEC_INTERRUPT 0xFFE00000 +#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 +#define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10 +#define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT 0x20 + +#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 + +struct hl_wait_cs_in { + union { + struct { + /* + * In case of wait_cs holds the CS sequence number. + * In case of wait for multi CS hold a user pointer to + * an array of CS sequence numbers + */ + __u64 seq; + /* Absolute timeout to wait for command submission + * in microseconds + */ + __u64 timeout_us; + }; + + struct { + union { + /* User address for completion comparison. + * upon interrupt, driver will compare the value pointed + * by this address with the supplied target value. + * in order not to perform any comparison, set address + * to all 1s. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set + */ + __u64 addr; + + /* cq_counters_handle to a kernel mapped cb which contains + * cq counters. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set + */ + __u64 cq_counters_handle; + }; + + /* Target value for completion comparison */ + __u64 target; + }; + }; + + /* Context ID - Currently not in use */ + __u32 ctx_id; + + /* HL_WAIT_CS_FLAGS_* + * If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include + * interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK + * + * in order to wait for any CQ interrupt, set interrupt value to + * HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT. + * + * in order to wait for any decoder interrupt, set interrupt value to + * HL_WAIT_CS_FLAGS_ANY_DEC_INTERRUPT. + */ + __u32 flags; + + union { + struct { + /* Multi CS API info- valid entries in multi-CS array */ + __u8 seq_arr_len; + __u8 pad[7]; + }; + + /* Absolute timeout to wait for an interrupt in microseconds. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set + */ + __u64 interrupt_timeout_us; + }; + + /* + * cq counter offset inside the counters cb pointed by cq_counters_handle above. + * upon interrupt, driver will compare the value pointed + * by this address (cq_counters_handle + cq_counters_offset) + * with the supplied target value. + * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set + */ + __u64 cq_counters_offset; + + /* + * Timestamp_handle timestamps buffer handle. + * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set + */ + __u64 timestamp_handle; + + /* + * Timestamp_offset is offset inside the timestamp buffer pointed by timestamp_handle above. + * upon interrupt, if the cq reached the target value then driver will write + * timestamp to this offset. + * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set + */ + __u64 timestamp_offset; +}; + +#define HL_WAIT_CS_STATUS_COMPLETED 0 +#define HL_WAIT_CS_STATUS_BUSY 1 +#define HL_WAIT_CS_STATUS_TIMEDOUT 2 +#define HL_WAIT_CS_STATUS_ABORTED 3 + +#define HL_WAIT_CS_STATUS_FLAG_GONE 0x1 +#define HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD 0x2 + +struct hl_wait_cs_out { + /* HL_WAIT_CS_STATUS_* */ + __u32 status; + /* HL_WAIT_CS_STATUS_FLAG* */ + __u32 flags; + /* + * valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set + * for wait_cs: timestamp of CS completion + * for wait_multi_cs: timestamp of FIRST CS completion + */ + __s64 timestamp_nsec; + /* multi CS completion bitmap */ + __u32 cs_completion_map; + __u32 pad; +}; + +union hl_wait_cs_args { + struct hl_wait_cs_in in; + struct hl_wait_cs_out out; +}; + +/* Opcode to allocate device memory */ +#define HL_MEM_OP_ALLOC 0 + +/* Opcode to free previously allocated device memory */ +#define HL_MEM_OP_FREE 1 + +/* Opcode to map host and device memory */ +#define HL_MEM_OP_MAP 2 + +/* Opcode to unmap previously mapped host and device memory */ +#define HL_MEM_OP_UNMAP 3 + +/* Opcode to map a hw block */ +#define HL_MEM_OP_MAP_BLOCK 4 + +/* Opcode to create DMA-BUF object for an existing device memory allocation + * and to export an FD of that DMA-BUF back to the caller + */ +#define HL_MEM_OP_EXPORT_DMABUF_FD 5 + +/* Opcode to create timestamps pool for user interrupts registration support + * The memory will be allocated by the kernel driver, A timestamp buffer which the user + * will get handle to it for mmap, and another internal buffer used by the + * driver for registration management + * The memory will be freed when the user closes the file descriptor(ctx close) + */ +#define HL_MEM_OP_TS_ALLOC 6 + +/* Memory flags */ +#define HL_MEM_CONTIGUOUS 0x1 +#define HL_MEM_SHARED 0x2 +#define HL_MEM_USERPTR 0x4 +#define HL_MEM_FORCE_HINT 0x8 +#define HL_MEM_PREFETCH 0x40 + +/** + * structure hl_mem_in - structure that handle input args for memory IOCTL + * @union arg: union of structures to be used based on the input operation + * @op: specify the requested memory operation (one of the HL_MEM_OP_* definitions). + * @flags: flags for the memory operation (one of the HL_MEM_* definitions). + * For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the DMA-BUF file/FD flags. + * @ctx_id: context ID - currently not in use. + * @num_of_elements: number of timestamp elements used only with HL_MEM_OP_TS_ALLOC opcode. + */ +struct hl_mem_in { + union { + /** + * structure for device memory allocation (used with the HL_MEM_OP_ALLOC op) + * @mem_size: memory size to allocate + * @page_size: page size to use on allocation. when the value is 0 the default page + * size will be taken. + */ + struct { + __u64 mem_size; + __u64 page_size; + } alloc; + + /** + * structure for free-ing device memory (used with the HL_MEM_OP_FREE op) + * @handle: handle returned from HL_MEM_OP_ALLOC + */ + struct { + __u64 handle; + } free; + + /** + * structure for mapping device memory (used with the HL_MEM_OP_MAP op) + * @hint_addr: requested virtual address of mapped memory. + * the driver will try to map the requested region to this hint + * address, as long as the address is valid and not already mapped. + * the user should check the returned address of the IOCTL to make + * sure he got the hint address. + * passing 0 here means that the driver will choose the address itself. + * @handle: handle returned from HL_MEM_OP_ALLOC. + */ + struct { + __u64 hint_addr; + __u64 handle; + } map_device; + + /** + * structure for mapping host memory (used with the HL_MEM_OP_MAP op) + * @host_virt_addr: address of allocated host memory. + * @hint_addr: requested virtual address of mapped memory. + * the driver will try to map the requested region to this hint + * address, as long as the address is valid and not already mapped. + * the user should check the returned address of the IOCTL to make + * sure he got the hint address. + * passing 0 here means that the driver will choose the address itself. + * @size: size of allocated host memory. + */ + struct { + __u64 host_virt_addr; + __u64 hint_addr; + __u64 mem_size; + } map_host; + + /** + * structure for mapping hw block (used with the HL_MEM_OP_MAP_BLOCK op) + * @block_addr:HW block address to map, a handle and size will be returned + * to the user and will be used to mmap the relevant block. + * only addresses from configuration space are allowed. + */ + struct { + __u64 block_addr; + } map_block; + + /** + * structure for unmapping host memory (used with the HL_MEM_OP_UNMAP op) + * @device_virt_addr: virtual address returned from HL_MEM_OP_MAP + */ + struct { + __u64 device_virt_addr; + } unmap; + + /** + * structure for exporting DMABUF object (used with + * the HL_MEM_OP_EXPORT_DMABUF_FD op) + * @addr: for Gaudi1, the driver expects a physical address + * inside the device's DRAM. this is because in Gaudi1 + * we don't have MMU that covers the device's DRAM. + * for all other ASICs, the driver expects a device + * virtual address that represents the start address of + * a mapped DRAM memory area inside the device. + * the address must be the same as was received from the + * driver during a previous HL_MEM_OP_MAP operation. + * @mem_size: size of memory to export. + * @offset: for Gaudi1, this value must be 0. For all other ASICs, + * the driver expects an offset inside of the memory area + * describe by addr. the offset represents the start + * address of that the exported dma-buf object describes. + */ + struct { + __u64 addr; + __u64 mem_size; + __u64 offset; + } export_dmabuf_fd; + }; + + __u32 op; + __u32 flags; + __u32 ctx_id; + __u32 num_of_elements; +}; + +struct hl_mem_out { + union { + /* + * Used for HL_MEM_OP_MAP as the virtual address that was + * assigned in the device VA space. + * A value of 0 means the requested operation failed. + */ + __u64 device_virt_addr; + + /* + * Used in HL_MEM_OP_ALLOC + * This is the assigned handle for the allocated memory + */ + __u64 handle; + + struct { + /* + * Used in HL_MEM_OP_MAP_BLOCK. + * This is the assigned handle for the mapped block + */ + __u64 block_handle; + + /* + * Used in HL_MEM_OP_MAP_BLOCK + * This is the size of the mapped block + */ + __u32 block_size; + + __u32 pad; + }; + + /* Returned in HL_MEM_OP_EXPORT_DMABUF_FD. Represents the + * DMA-BUF object that was created to describe a memory + * allocation on the device's memory space. The FD should be + * passed to the importer driver + */ + __s32 fd; + }; +}; + +union hl_mem_args { + struct hl_mem_in in; + struct hl_mem_out out; +}; + +#define HL_DEBUG_MAX_AUX_VALUES 10 + +struct hl_debug_params_etr { + /* Address in memory to allocate buffer */ + __u64 buffer_address; + + /* Size of buffer to allocate */ + __u64 buffer_size; + + /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ + __u32 sink_mode; + __u32 pad; +}; + +struct hl_debug_params_etf { + /* Address in memory to allocate buffer */ + __u64 buffer_address; + + /* Size of buffer to allocate */ + __u64 buffer_size; + + /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ + __u32 sink_mode; + __u32 pad; +}; + +struct hl_debug_params_stm { + /* Two bit masks for HW event and Stimulus Port */ + __u64 he_mask; + __u64 sp_mask; + + /* Trace source ID */ + __u32 id; + + /* Frequency for the timestamp register */ + __u32 frequency; +}; + +struct hl_debug_params_bmon { + /* Two address ranges that the user can request to filter */ + __u64 start_addr0; + __u64 addr_mask0; + + __u64 start_addr1; + __u64 addr_mask1; + + /* Capture window configuration */ + __u32 bw_win; + __u32 win_capture; + + /* Trace source ID */ + __u32 id; + + /* Control register */ + __u32 control; + + /* Two more address ranges that the user can request to filter */ + __u64 start_addr2; + __u64 end_addr2; + + __u64 start_addr3; + __u64 end_addr3; +}; + +struct hl_debug_params_spmu { + /* Event types selection */ + __u64 event_types[HL_DEBUG_MAX_AUX_VALUES]; + + /* Number of event types selection */ + __u32 event_types_num; + + /* TRC configuration register values */ + __u32 pmtrc_val; + __u32 trc_ctrl_host_val; + __u32 trc_en_host_val; +}; + +/* Opcode for ETR component */ +#define HL_DEBUG_OP_ETR 0 +/* Opcode for ETF component */ +#define HL_DEBUG_OP_ETF 1 +/* Opcode for STM component */ +#define HL_DEBUG_OP_STM 2 +/* Opcode for FUNNEL component */ +#define HL_DEBUG_OP_FUNNEL 3 +/* Opcode for BMON component */ +#define HL_DEBUG_OP_BMON 4 +/* Opcode for SPMU component */ +#define HL_DEBUG_OP_SPMU 5 +/* Opcode for timestamp (deprecated) */ +#define HL_DEBUG_OP_TIMESTAMP 6 +/* Opcode for setting the device into or out of debug mode. The enable + * variable should be 1 for enabling debug mode and 0 for disabling it + */ +#define HL_DEBUG_OP_SET_MODE 7 + +struct hl_debug_args { + /* + * Pointer to user input structure. + * This field is relevant to specific opcodes. + */ + __u64 input_ptr; + /* Pointer to user output structure */ + __u64 output_ptr; + /* Size of user input structure */ + __u32 input_size; + /* Size of user output structure */ + __u32 output_size; + /* HL_DEBUG_OP_* */ + __u32 op; + /* + * Register index in the component, taken from the debug_regs_index enum + * in the various ASIC header files + */ + __u32 reg_idx; + /* Enable/disable */ + __u32 enable; + /* Context ID - Currently not in use */ + __u32 ctx_id; +}; + +/* + * Various information operations such as: + * - H/W IP information + * - Current dram usage + * + * The user calls this IOCTL with an opcode that describes the required + * information. The user should supply a pointer to a user-allocated memory + * chunk, which will be filled by the driver with the requested information. + * + * The user supplies the maximum amount of size to copy into the user's memory, + * in order to prevent data corruption in case of differences between the + * definitions of structures in kernel and userspace, e.g. in case of old + * userspace and new kernel driver + */ +#define HL_IOCTL_INFO \ + _IOWR('H', 0x01, struct hl_info_args) + +/* + * Command Buffer + * - Request a Command Buffer + * - Destroy a Command Buffer + * + * The command buffers are memory blocks that reside in DMA-able address + * space and are physically contiguous so they can be accessed by the device + * directly. They are allocated using the coherent DMA API. + * + * When creating a new CB, the IOCTL returns a handle of it, and the user-space + * process needs to use that handle to mmap the buffer so it can access them. + * + * In some instances, the device must access the command buffer through the + * device's MMU, and thus its memory should be mapped. In these cases, user can + * indicate the driver that such a mapping is required. + * The resulting device virtual address will be used internally by the driver, + * and won't be returned to user. + * + */ +#define HL_IOCTL_CB \ + _IOWR('H', 0x02, union hl_cb_args) + +/* + * Command Submission + * + * To submit work to the device, the user need to call this IOCTL with a set + * of JOBS. That set of JOBS constitutes a CS object. + * Each JOB will be enqueued on a specific queue, according to the user's input. + * There can be more then one JOB per queue. + * + * The CS IOCTL will receive two sets of JOBS. One set is for "restore" phase + * and a second set is for "execution" phase. + * The JOBS on the "restore" phase are enqueued only after context-switch + * (or if its the first CS for this context). The user can also order the + * driver to run the "restore" phase explicitly + * + * Goya/Gaudi: + * There are two types of queues - external and internal. External queues + * are DMA queues which transfer data from/to the Host. All other queues are + * internal. The driver will get completion notifications from the device only + * on JOBS which are enqueued in the external queues. + * + * Greco onwards: + * There is a single type of queue for all types of engines, either DMA engines + * for transfers from/to the host or inside the device, or compute engines. + * The driver will get completion notifications from the device for all queues. + * + * For jobs on external queues, the user needs to create command buffers + * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on + * internal queues, the user needs to prepare a "command buffer" with packets + * on either the device SRAM/DRAM or the host, and give the device address of + * that buffer to the CS ioctl. + * For jobs on H/W queues both options of command buffers are valid. + * + * This IOCTL is asynchronous in regard to the actual execution of the CS. This + * means it returns immediately after ALL the JOBS were enqueued on their + * relevant queues. Therefore, the user mustn't assume the CS has been completed + * or has even started to execute. + * + * Upon successful enqueue, the IOCTL returns a sequence number which the user + * can use with the "Wait for CS" IOCTL to check whether the handle's CS + * non-internal JOBS have been completed. Note that if the CS has internal JOBS + * which can execute AFTER the external JOBS have finished, the driver might + * report that the CS has finished executing BEFORE the internal JOBS have + * actually finished executing. + * + * Even though the sequence number increments per CS, the user can NOT + * automatically assume that if CS with sequence number N finished, then CS + * with sequence number N-1 also finished. The user can make this assumption if + * and only if CS N and CS N-1 are exactly the same (same CBs for the same + * queues). + */ +#define HL_IOCTL_CS \ + _IOWR('H', 0x03, union hl_cs_args) + +/* + * Wait for Command Submission + * + * The user can call this IOCTL with a handle it received from the CS IOCTL + * to wait until the handle's CS has finished executing. The user will wait + * inside the kernel until the CS has finished or until the user-requested + * timeout has expired. + * + * If the timeout value is 0, the driver won't sleep at all. It will check + * the status of the CS and return immediately + * + * The return value of the IOCTL is a standard Linux error code. The possible + * values are: + * + * EINTR - Kernel waiting has been interrupted, e.g. due to OS signal + * that the user process received + * ETIMEDOUT - The CS has caused a timeout on the device + * EIO - The CS was aborted (usually because the device was reset) + * ENODEV - The device wants to do hard-reset (so user need to close FD) + * + * The driver also returns a custom define in case the IOCTL call returned 0. + * The define can be one of the following: + * + * HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0) + * HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0) + * HL_WAIT_CS_STATUS_TIMEDOUT - The CS has caused a timeout on the device + * (ETIMEDOUT) + * HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the + * device was reset (EIO) + */ + +#define HL_IOCTL_WAIT_CS \ + _IOWR('H', 0x04, union hl_wait_cs_args) + +/* + * Memory + * - Map host memory to device MMU + * - Unmap host memory from device MMU + * + * This IOCTL allows the user to map host memory to the device MMU + * + * For host memory, the IOCTL doesn't allocate memory. The user is supposed + * to allocate the memory in user-space (malloc/new). The driver pins the + * physical pages (up to the allowed limit by the OS), assigns a virtual + * address in the device VA space and initializes the device MMU. + * + * There is an option for the user to specify the requested virtual address. + * + */ +#define HL_IOCTL_MEMORY \ + _IOWR('H', 0x05, union hl_mem_args) + +/* + * Debug + * - Enable/disable the ETR/ETF/FUNNEL/STM/BMON/SPMU debug traces + * + * This IOCTL allows the user to get debug traces from the chip. + * + * Before the user can send configuration requests of the various + * debug/profile engines, it needs to set the device into debug mode. + * This is because the debug/profile infrastructure is shared component in the + * device and we can't allow multiple users to access it at the same time. + * + * Once a user set the device into debug mode, the driver won't allow other + * users to "work" with the device, i.e. open a FD. If there are multiple users + * opened on the device, the driver won't allow any user to debug the device. + * + * For each configuration request, the user needs to provide the register index + * and essential data such as buffer address and size. + * + * Once the user has finished using the debug/profile engines, he should + * set the device into non-debug mode, i.e. disable debug mode. + * + * The driver can decide to "kick out" the user if he abuses this interface. + * + */ +#define HL_IOCTL_DEBUG \ + _IOWR('H', 0x06, struct hl_debug_args) + +#define HL_COMMAND_START 0x01 +#define HL_COMMAND_END 0x07 + +#endif /* HABANALABS_H_ */ -- cgit v1.2.3 From 20faaeec3794661c1d73578316691174a5a0e5a9 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Sun, 18 Dec 2022 09:42:34 +0200 Subject: habanalabs: add uapi to flush inbound HBM transactions When doing p2p with a NIC device, the NIC needs to make sure all the writes to the HBM (through the PCI bar of the Gaudi device) were flushed. It can be done by either the NIC or the host reading through the PCI bar. To support the host side, we supply a simple uapi to perform this flush through the driver, because the user can't create such a transaction by itself (the PCI bar isn't exposed to normal users). Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/drm/habanalabs_accel.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h index 90e628779264..331567ec9e79 100644 --- a/include/uapi/drm/habanalabs_accel.h +++ b/include/uapi/drm/habanalabs_accel.h @@ -1478,6 +1478,14 @@ struct hl_cs_chunk { */ #define HL_CS_FLAGS_ENGINE_CORE_COMMAND 0x4000 +/* + * The flush HBW PCI writes is merged into the existing CS ioctls. + * Used to flush all HBW PCI writes. + * This is a blocking operation and for this reason the user shall not use + * the return sequence number (which will be invalid anyway) + */ +#define HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES 0x8000 + #define HL_CS_STATUS_SUCCESS 0 #define HL_MAX_JOBS_PER_CS 512 -- cgit v1.2.3 From b299221faf9b62166413526be2438d21257f019e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 29 Jan 2023 23:00:59 -0500 Subject: drm/amdgpu: add more fields into device info, caches sizes, etc. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD: important for conformance on gfx11 Other fields are exposed from IP discovery. enabled_rb_pipes_mask_hi is added for future chips, currently 0. Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21403 Signed-off-by: Marek Olšák Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 973af6d06626..b6eb90df5d05 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -715,6 +715,7 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_IDS_FLAGS_FUSION 0x1 #define AMDGPU_IDS_FLAGS_PREEMPTION 0x2 #define AMDGPU_IDS_FLAGS_TMZ 0x4 +#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8 /* indicate if acceleration can be working */ #define AMDGPU_INFO_ACCEL_WORKING 0x00 @@ -1115,6 +1116,16 @@ struct drm_amdgpu_info_device { __u64 tcc_disabled_mask; __u64 min_engine_clock; __u64 min_memory_clock; + /* The following fields are only set on gfx11+, older chips set 0. */ + __u32 tcp_cache_size; /* AKA GL0, VMEM cache */ + __u32 num_sqc_per_wgp; + __u32 sqc_data_cache_size; /* AKA SMEM cache */ + __u32 sqc_inst_cache_size; + __u32 gl1c_cache_size; + __u32 gl2c_cache_size; + __u64 mall_size; /* AKA infinity cache */ + /* high 32 bits of the rb pipes mask */ + __u32 enabled_rb_pipes_mask_hi; }; struct drm_amdgpu_info_hw_ip { -- cgit v1.2.3