summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2026-01-01 17:00:22 +1000
committerDave Airlie <airlied@redhat.com>2026-01-01 17:00:59 +1000
commit59260fe5821ad108d0fda8a4a4fe0448e9821f27 (patch)
treeb6581b5aa5cae8b4fc1f21761211a87eaa5a6dc6 /drivers/gpu/drm
parent9ec3c8ee16a07dff8be82aba595dd77c134c03c2 (diff)
parent0b075f82935e82fc9fff90d06d2a161caaebd9c3 (diff)
Merge tag 'drm-xe-next-2025-12-30' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
Core Changes: - Dynamic pagemaps and multi-device SVM (Thomas) Driver Changes: - Introduce SRIOV scheduler Groups (Daniele) - Configure migration queue as low latency (Francois) - Don't use absolute path in generated header comment (Calvin Owens) - Add SoC remapper support for system controller (Umesh) - Insert compiler barriers in GuC code (Jonathan) - Rebar updates (Lucas) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com> Link: https://patch.msgid.link/aVOiULyYdnFbq-JB@fedora
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/Makefile3
-rw-r--r--drivers/gpu/drm/drm_gpusvm.c124
-rw-r--r--drivers/gpu/drm/drm_pagemap.c565
-rw-r--r--drivers/gpu/drm/drm_pagemap_util.c568
-rw-r--r--drivers/gpu/drm/xe/Makefile2
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h64
-rw-r--r--drivers/gpu/drm/xe/abi/guc_scheduler_abi.h57
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pmt.h3
-rw-r--r--drivers/gpu/drm/xe/regs/xe_soc_remapper_regs.h14
-rw-r--r--drivers/gpu/drm/xe/xe_device.c25
-rw-r--r--drivers/gpu/drm/xe/xe_device.h2
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h22
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c19
-rw-r--r--drivers/gpu/drm/xe/xe_gen_wa_oob.c3
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h12
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.c8
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c20
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.h8
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c279
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h10
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c295
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c329
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h10
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h39
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c67
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c21
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h6
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h54
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_helpers.c9
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c40
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h2
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c32
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.h6
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c8
-rw-r--r--drivers/gpu/drm/xe/xe_pci_rebar.c106
-rw-r--r--drivers/gpu/drm/xe/xe_pci_rebar.h13
-rw-r--r--drivers/gpu/drm/xe/xe_pci_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_soc_remapper.c52
-rw-r--r--drivers/gpu/drm/xe/xe_soc_remapper.h13
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c721
-rw-r--r--drivers/gpu/drm/xe/xe_svm.h85
-rw-r--r--drivers/gpu/drm/xe/xe_tile.c34
-rw-r--r--drivers/gpu/drm/xe/xe_tile.h21
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.c2
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c65
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h1
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.c106
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h21
-rw-r--r--drivers/gpu/drm/xe/xe_vram.c91
-rw-r--r--drivers/gpu/drm/xe/xe_vram.h1
-rw-r--r--drivers/gpu/drm/xe/xe_vram_types.h15
-rw-r--r--drivers/gpu/drm/xe/xe_vsec.c6
58 files changed, 3622 insertions, 473 deletions
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 0e1c668b46d2..0deee72ef935 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -109,7 +109,8 @@ obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o
drm_gpusvm_helper-y := \
drm_gpusvm.o\
- drm_pagemap.o
+ drm_pagemap.o\
+ drm_pagemap_util.o
obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c
index 39c8c50401dd..aa9a0b60e727 100644
--- a/drivers/gpu/drm/drm_gpusvm.c
+++ b/drivers/gpu/drm/drm_gpusvm.c
@@ -744,6 +744,127 @@ err_free:
}
/**
+ * drm_gpusvm_scan_mm() - Check the migration state of a drm_gpusvm_range
+ * @range: Pointer to the struct drm_gpusvm_range to check.
+ * @dev_private_owner: The struct dev_private_owner to use to determine
+ * compatible device-private pages.
+ * @pagemap: The struct dev_pagemap pointer to use for pagemap-specific
+ * checks.
+ *
+ * Scan the CPU address space corresponding to @range and return the
+ * current migration state. Note that the result may be invalid as
+ * soon as the function returns. It's an advisory check.
+ *
+ * TODO: Bail early and call hmm_range_fault() for subranges.
+ *
+ * Return: See &enum drm_gpusvm_scan_result.
+ */
+enum drm_gpusvm_scan_result drm_gpusvm_scan_mm(struct drm_gpusvm_range *range,
+ void *dev_private_owner,
+ const struct dev_pagemap *pagemap)
+{
+ struct mmu_interval_notifier *notifier = &range->notifier->notifier;
+ unsigned long start = drm_gpusvm_range_start(range);
+ unsigned long end = drm_gpusvm_range_end(range);
+ struct hmm_range hmm_range = {
+ .default_flags = 0,
+ .notifier = notifier,
+ .start = start,
+ .end = end,
+ .dev_private_owner = dev_private_owner,
+ };
+ unsigned long timeout =
+ jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+ enum drm_gpusvm_scan_result state = DRM_GPUSVM_SCAN_UNPOPULATED, new_state;
+ unsigned long *pfns;
+ unsigned long npages = npages_in_range(start, end);
+ const struct dev_pagemap *other = NULL;
+ int err, i;
+
+ pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+ if (!pfns)
+ return DRM_GPUSVM_SCAN_UNPOPULATED;
+
+ hmm_range.hmm_pfns = pfns;
+
+retry:
+ hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
+ mmap_read_lock(range->gpusvm->mm);
+
+ while (true) {
+ err = hmm_range_fault(&hmm_range);
+ if (err == -EBUSY) {
+ if (time_after(jiffies, timeout))
+ break;
+
+ hmm_range.notifier_seq =
+ mmu_interval_read_begin(notifier);
+ continue;
+ }
+ break;
+ }
+ mmap_read_unlock(range->gpusvm->mm);
+ if (err)
+ goto err_free;
+
+ drm_gpusvm_notifier_lock(range->gpusvm);
+ if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) {
+ drm_gpusvm_notifier_unlock(range->gpusvm);
+ goto retry;
+ }
+
+ for (i = 0; i < npages;) {
+ struct page *page;
+ const struct dev_pagemap *cur = NULL;
+
+ if (!(pfns[i] & HMM_PFN_VALID)) {
+ state = DRM_GPUSVM_SCAN_UNPOPULATED;
+ goto err_free;
+ }
+
+ page = hmm_pfn_to_page(pfns[i]);
+ if (is_device_private_page(page) ||
+ is_device_coherent_page(page))
+ cur = page_pgmap(page);
+
+ if (cur == pagemap) {
+ new_state = DRM_GPUSVM_SCAN_EQUAL;
+ } else if (cur && (cur == other || !other)) {
+ new_state = DRM_GPUSVM_SCAN_OTHER;
+ other = cur;
+ } else if (cur) {
+ new_state = DRM_GPUSVM_SCAN_MIXED_DEVICE;
+ } else {
+ new_state = DRM_GPUSVM_SCAN_SYSTEM;
+ }
+
+ /*
+ * TODO: Could use an array for state
+ * transitions, and caller might want it
+ * to bail early for some results.
+ */
+ if (state == DRM_GPUSVM_SCAN_UNPOPULATED) {
+ state = new_state;
+ } else if (state != new_state) {
+ if (new_state == DRM_GPUSVM_SCAN_SYSTEM ||
+ state == DRM_GPUSVM_SCAN_SYSTEM)
+ state = DRM_GPUSVM_SCAN_MIXED;
+ else if (state != DRM_GPUSVM_SCAN_MIXED)
+ state = DRM_GPUSVM_SCAN_MIXED_DEVICE;
+ }
+
+ i += 1ul << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages);
+ }
+
+err_free:
+ drm_gpusvm_notifier_unlock(range->gpusvm);
+
+ kvfree(pfns);
+ return state;
+}
+EXPORT_SYMBOL(drm_gpusvm_scan_mm);
+
+/**
* drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range
* @gpusvm: Pointer to the GPU SVM structure
* @notifier: Pointer to the GPU SVM notifier structure
@@ -1038,6 +1159,7 @@ static void __drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm,
flags.has_dma_mapping = false;
WRITE_ONCE(svm_pages->flags.__flags, flags.__flags);
+ drm_pagemap_put(svm_pages->dpagemap);
svm_pages->dpagemap = NULL;
}
}
@@ -1434,6 +1556,8 @@ map_pages:
if (pagemap) {
flags.has_devmem_pages = true;
+ drm_pagemap_get(dpagemap);
+ drm_pagemap_put(svm_pages->dpagemap);
svm_pages->dpagemap = dpagemap;
}
diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c
index 37d7cfbbb3e8..ba099aa7c52f 100644
--- a/drivers/gpu/drm/drm_pagemap.c
+++ b/drivers/gpu/drm/drm_pagemap.c
@@ -3,11 +3,14 @@
* Copyright © 2024-2025 Intel Corporation
*/
+#include <linux/dma-fence.h>
#include <linux/dma-mapping.h>
#include <linux/migrate.h>
#include <linux/pagemap.h>
#include <drm/drm_drv.h>
#include <drm/drm_pagemap.h>
+#include <drm/drm_pagemap_util.h>
+#include <drm/drm_print.h>
/**
* DOC: Overview
@@ -62,7 +65,7 @@
*
* @refcount: Reference count for the zdd
* @devmem_allocation: device memory allocation
- * @device_private_page_owner: Device private pages owner
+ * @dpagemap: Refcounted pointer to the underlying struct drm_pagemap.
*
* This structure serves as a generic wrapper installed in
* page->zone_device_data. It provides infrastructure for looking up a device
@@ -74,12 +77,12 @@
struct drm_pagemap_zdd {
struct kref refcount;
struct drm_pagemap_devmem *devmem_allocation;
- void *device_private_page_owner;
+ struct drm_pagemap *dpagemap;
};
/**
* drm_pagemap_zdd_alloc() - Allocate a zdd structure.
- * @device_private_page_owner: Device private pages owner
+ * @dpagemap: Pointer to the underlying struct drm_pagemap.
*
* This function allocates and initializes a new zdd structure. It sets up the
* reference count and initializes the destroy work.
@@ -87,7 +90,7 @@ struct drm_pagemap_zdd {
* Return: Pointer to the allocated zdd on success, ERR_PTR() on failure.
*/
static struct drm_pagemap_zdd *
-drm_pagemap_zdd_alloc(void *device_private_page_owner)
+drm_pagemap_zdd_alloc(struct drm_pagemap *dpagemap)
{
struct drm_pagemap_zdd *zdd;
@@ -97,7 +100,7 @@ drm_pagemap_zdd_alloc(void *device_private_page_owner)
kref_init(&zdd->refcount);
zdd->devmem_allocation = NULL;
- zdd->device_private_page_owner = device_private_page_owner;
+ zdd->dpagemap = drm_pagemap_get(dpagemap);
return zdd;
}
@@ -127,6 +130,7 @@ static void drm_pagemap_zdd_destroy(struct kref *ref)
struct drm_pagemap_zdd *zdd =
container_of(ref, struct drm_pagemap_zdd, refcount);
struct drm_pagemap_devmem *devmem = zdd->devmem_allocation;
+ struct drm_pagemap *dpagemap = zdd->dpagemap;
if (devmem) {
complete_all(&devmem->detached);
@@ -134,6 +138,7 @@ static void drm_pagemap_zdd_destroy(struct kref *ref)
devmem->ops->devmem_release(devmem);
}
kfree(zdd);
+ drm_pagemap_put(dpagemap);
}
/**
@@ -201,11 +206,13 @@ static void drm_pagemap_get_devmem_page(struct page *page,
/**
* drm_pagemap_migrate_map_pages() - Map migration pages for GPU SVM migration
- * @dev: The device for which the pages are being mapped
- * @pagemap_addr: Array to store DMA information corresponding to mapped pages
- * @migrate_pfn: Array of migrate page frame numbers to map
- * @npages: Number of pages to map
+ * @dev: The device performing the migration.
+ * @local_dpagemap: The drm_pagemap local to the migrating device.
+ * @pagemap_addr: Array to store DMA information corresponding to mapped pages.
+ * @migrate_pfn: Array of page frame numbers of system pages or peer pages to map.
+ * @npages: Number of system pages or peer pages to map.
* @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL)
+ * @mdetails: Details governing the migration behaviour.
*
* This function maps pages of memory for migration usage in GPU SVM. It
* iterates over each page frame number provided in @migrate_pfn, maps the
@@ -215,12 +222,14 @@ static void drm_pagemap_get_devmem_page(struct page *page,
* Returns: 0 on success, -EFAULT if an error occurs during mapping.
*/
static int drm_pagemap_migrate_map_pages(struct device *dev,
+ struct drm_pagemap *local_dpagemap,
struct drm_pagemap_addr *pagemap_addr,
unsigned long *migrate_pfn,
unsigned long npages,
- enum dma_data_direction dir)
+ enum dma_data_direction dir,
+ const struct drm_pagemap_migrate_details *mdetails)
{
- unsigned long i;
+ unsigned long num_peer_pages = 0, num_local_pages = 0, i;
for (i = 0; i < npages;) {
struct page *page = migrate_pfn_to_page(migrate_pfn[i]);
@@ -231,31 +240,58 @@ static int drm_pagemap_migrate_map_pages(struct device *dev,
if (!page)
goto next;
- if (WARN_ON_ONCE(is_zone_device_page(page)))
- return -EFAULT;
-
folio = page_folio(page);
order = folio_order(folio);
- dma_addr = dma_map_page(dev, page, 0, page_size(page), dir);
- if (dma_mapping_error(dev, dma_addr))
- return -EFAULT;
-
- pagemap_addr[i] =
- drm_pagemap_addr_encode(dma_addr,
- DRM_INTERCONNECT_SYSTEM,
- order, dir);
+ if (is_device_private_page(page)) {
+ struct drm_pagemap_zdd *zdd = page->zone_device_data;
+ struct drm_pagemap *dpagemap = zdd->dpagemap;
+ struct drm_pagemap_addr addr;
+
+ if (dpagemap == local_dpagemap) {
+ if (!mdetails->can_migrate_same_pagemap)
+ goto next;
+
+ num_local_pages += NR_PAGES(order);
+ } else {
+ num_peer_pages += NR_PAGES(order);
+ }
+
+ addr = dpagemap->ops->device_map(dpagemap, dev, page, order, dir);
+ if (dma_mapping_error(dev, addr.addr))
+ return -EFAULT;
+
+ pagemap_addr[i] = addr;
+ } else {
+ dma_addr = dma_map_page(dev, page, 0, page_size(page), dir);
+ if (dma_mapping_error(dev, dma_addr))
+ return -EFAULT;
+
+ pagemap_addr[i] =
+ drm_pagemap_addr_encode(dma_addr,
+ DRM_INTERCONNECT_SYSTEM,
+ order, dir);
+ }
next:
i += NR_PAGES(order);
}
+ if (num_peer_pages)
+ drm_dbg(local_dpagemap->drm, "Migrating %lu peer pages over interconnect.\n",
+ num_peer_pages);
+ if (num_local_pages)
+ drm_dbg(local_dpagemap->drm, "Migrating %lu local pages over interconnect.\n",
+ num_local_pages);
+
return 0;
}
/**
* drm_pagemap_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration
* @dev: The device for which the pages were mapped
+ * @migrate_pfn: Array of migrate pfns set up for the mapped pages. Used to
+ * determine the drm_pagemap of a peer device private page.
* @pagemap_addr: Array of DMA information corresponding to mapped pages
* @npages: Number of pages to unmap
* @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL)
@@ -266,16 +302,27 @@ next:
*/
static void drm_pagemap_migrate_unmap_pages(struct device *dev,
struct drm_pagemap_addr *pagemap_addr,
+ unsigned long *migrate_pfn,
unsigned long npages,
enum dma_data_direction dir)
{
unsigned long i;
for (i = 0; i < npages;) {
- if (!pagemap_addr[i].addr || dma_mapping_error(dev, pagemap_addr[i].addr))
+ struct page *page = migrate_pfn_to_page(migrate_pfn[i]);
+
+ if (!page || !pagemap_addr[i].addr || dma_mapping_error(dev, pagemap_addr[i].addr))
goto next;
- dma_unmap_page(dev, pagemap_addr[i].addr, PAGE_SIZE << pagemap_addr[i].order, dir);
+ if (is_zone_device_page(page)) {
+ struct drm_pagemap_zdd *zdd = page->zone_device_data;
+ struct drm_pagemap *dpagemap = zdd->dpagemap;
+
+ dpagemap->ops->device_unmap(dpagemap, dev, pagemap_addr[i]);
+ } else {
+ dma_unmap_page(dev, pagemap_addr[i].addr,
+ PAGE_SIZE << pagemap_addr[i].order, dir);
+ }
next:
i += NR_PAGES(pagemap_addr[i].order);
@@ -288,18 +335,125 @@ npages_in_range(unsigned long start, unsigned long end)
return (end - start) >> PAGE_SHIFT;
}
+static int
+drm_pagemap_migrate_remote_to_local(struct drm_pagemap_devmem *devmem,
+ struct device *remote_device,
+ struct drm_pagemap *remote_dpagemap,
+ unsigned long local_pfns[],
+ struct page *remote_pages[],
+ struct drm_pagemap_addr pagemap_addr[],
+ unsigned long npages,
+ const struct drm_pagemap_devmem_ops *ops,
+ const struct drm_pagemap_migrate_details *mdetails)
+
+{
+ int err = drm_pagemap_migrate_map_pages(remote_device, remote_dpagemap,
+ pagemap_addr, local_pfns,
+ npages, DMA_FROM_DEVICE, mdetails);
+
+ if (err)
+ goto out;
+
+ err = ops->copy_to_ram(remote_pages, pagemap_addr, npages,
+ devmem->pre_migrate_fence);
+out:
+ drm_pagemap_migrate_unmap_pages(remote_device, pagemap_addr, local_pfns,
+ npages, DMA_FROM_DEVICE);
+ return err;
+}
+
+static int
+drm_pagemap_migrate_sys_to_dev(struct drm_pagemap_devmem *devmem,
+ unsigned long sys_pfns[],
+ struct page *local_pages[],
+ struct drm_pagemap_addr pagemap_addr[],
+ unsigned long npages,
+ const struct drm_pagemap_devmem_ops *ops,
+ const struct drm_pagemap_migrate_details *mdetails)
+{
+ int err = drm_pagemap_migrate_map_pages(devmem->dev, devmem->dpagemap,
+ pagemap_addr, sys_pfns, npages,
+ DMA_TO_DEVICE, mdetails);
+
+ if (err)
+ goto out;
+
+ err = ops->copy_to_devmem(local_pages, pagemap_addr, npages,
+ devmem->pre_migrate_fence);
+out:
+ drm_pagemap_migrate_unmap_pages(devmem->dev, pagemap_addr, sys_pfns, npages,
+ DMA_TO_DEVICE);
+ return err;
+}
+
+/**
+ * struct migrate_range_loc - Cursor into the loop over migrate_pfns for migrating to
+ * device.
+ * @start: The current loop index.
+ * @device: migrating device.
+ * @dpagemap: Pointer to struct drm_pagemap used by the migrating device.
+ * @ops: The copy ops to be used for the migrating device.
+ */
+struct migrate_range_loc {
+ unsigned long start;
+ struct device *device;
+ struct drm_pagemap *dpagemap;
+ const struct drm_pagemap_devmem_ops *ops;
+};
+
+static int drm_pagemap_migrate_range(struct drm_pagemap_devmem *devmem,
+ unsigned long src_pfns[],
+ unsigned long dst_pfns[],
+ struct page *pages[],
+ struct drm_pagemap_addr pagemap_addr[],
+ struct migrate_range_loc *last,
+ const struct migrate_range_loc *cur,
+ const struct drm_pagemap_migrate_details *mdetails)
+{
+ int ret = 0;
+
+ if (cur->start == 0)
+ goto out;
+
+ if (cur->start <= last->start)
+ return 0;
+
+ if (cur->dpagemap == last->dpagemap && cur->ops == last->ops)
+ return 0;
+
+ if (last->dpagemap)
+ ret = drm_pagemap_migrate_remote_to_local(devmem,
+ last->device,
+ last->dpagemap,
+ &dst_pfns[last->start],
+ &pages[last->start],
+ &pagemap_addr[last->start],
+ cur->start - last->start,
+ last->ops, mdetails);
+
+ else
+ ret = drm_pagemap_migrate_sys_to_dev(devmem,
+ &src_pfns[last->start],
+ &pages[last->start],
+ &pagemap_addr[last->start],
+ cur->start - last->start,
+ last->ops, mdetails);
+
+out:
+ *last = *cur;
+ return ret;
+}
+
/**
* drm_pagemap_migrate_to_devmem() - Migrate a struct mm_struct range to device memory
* @devmem_allocation: The device memory allocation to migrate to.
* The caller should hold a reference to the device memory allocation,
- * and the reference is consumed by this function unless it returns with
+ * and the reference is consumed by this function even if it returns with
* an error.
* @mm: Pointer to the struct mm_struct.
* @start: Start of the virtual address range to migrate.
* @end: End of the virtual address range to migrate.
- * @timeslice_ms: The time requested for the migrated pagemap pages to
- * be present in @mm before being allowed to be migrated back.
- * @pgmap_owner: Not used currently, since only system memory is considered.
+ * @mdetails: Details to govern the migration.
*
* This function migrates the specified virtual address range to device memory.
* It performs the necessary setup and invokes the driver-specific operations for
@@ -317,17 +471,21 @@ npages_in_range(unsigned long start, unsigned long end)
int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation,
struct mm_struct *mm,
unsigned long start, unsigned long end,
- unsigned long timeslice_ms,
- void *pgmap_owner)
+ const struct drm_pagemap_migrate_details *mdetails)
{
const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops;
+ struct drm_pagemap *dpagemap = devmem_allocation->dpagemap;
+ struct dev_pagemap *pagemap = dpagemap->pagemap;
struct migrate_vma migrate = {
.start = start,
.end = end,
- .pgmap_owner = pgmap_owner,
- .flags = MIGRATE_VMA_SELECT_SYSTEM,
+ .pgmap_owner = pagemap->owner,
+ .flags = MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT |
+ MIGRATE_VMA_SELECT_DEVICE_PRIVATE,
};
unsigned long i, npages = npages_in_range(start, end);
+ unsigned long own_pages = 0, migrated_pages = 0;
+ struct migrate_range_loc cur, last = {.device = dpagemap->drm->dev, .ops = ops};
struct vm_area_struct *vas;
struct drm_pagemap_zdd *zdd = NULL;
struct page **pages;
@@ -366,11 +524,13 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation,
pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages);
pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages;
- zdd = drm_pagemap_zdd_alloc(pgmap_owner);
+ zdd = drm_pagemap_zdd_alloc(dpagemap);
if (!zdd) {
err = -ENOMEM;
- goto err_free;
+ kvfree(buf);
+ goto err_out;
}
+ zdd->devmem_allocation = devmem_allocation; /* Owns ref */
migrate.vma = vas;
migrate.src = buf;
@@ -381,54 +541,134 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation,
goto err_free;
if (!migrate.cpages) {
- err = -EFAULT;
+ /* No pages to migrate. Raced or unknown device pages. */
+ err = -EBUSY;
goto err_free;
}
if (migrate.cpages != npages) {
+ /*
+ * Some pages to migrate. But we want to migrate all or
+ * nothing. Raced or unknown device pages.
+ */
err = -EBUSY;
- goto err_finalize;
+ goto err_aborted_migration;
}
- err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst);
- if (err)
- goto err_finalize;
+ /* Count device-private pages to migrate */
+ for (i = 0; i < npages;) {
+ struct page *src_page = migrate_pfn_to_page(migrate.src[i]);
+ unsigned long nr_pages = src_page ? NR_PAGES(folio_order(page_folio(src_page))) : 1;
- err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, pagemap_addr,
- migrate.src, npages, DMA_TO_DEVICE);
+ if (src_page && is_zone_device_page(src_page)) {
+ if (page_pgmap(src_page) == pagemap)
+ own_pages += nr_pages;
+ }
+
+ i += nr_pages;
+ }
+ drm_dbg(dpagemap->drm, "Total pages %lu; Own pages: %lu.\n",
+ npages, own_pages);
+ if (own_pages == npages) {
+ err = 0;
+ drm_dbg(dpagemap->drm, "Migration wasn't necessary.\n");
+ goto err_aborted_migration;
+ } else if (own_pages && !mdetails->can_migrate_same_pagemap) {
+ err = -EBUSY;
+ drm_dbg(dpagemap->drm, "Migration aborted due to fragmentation.\n");
+ goto err_aborted_migration;
+ }
+
+ err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst);
if (err)
goto err_finalize;
+ own_pages = 0;
+
for (i = 0; i < npages; ++i) {
struct page *page = pfn_to_page(migrate.dst[i]);
+ struct page *src_page = migrate_pfn_to_page(migrate.src[i]);
+ cur.start = i;
+
+ pages[i] = NULL;
+ if (src_page && is_device_private_page(src_page)) {
+ struct drm_pagemap_zdd *src_zdd = src_page->zone_device_data;
- pages[i] = page;
+ if (page_pgmap(src_page) == pagemap &&
+ !mdetails->can_migrate_same_pagemap) {
+ migrate.dst[i] = 0;
+ own_pages++;
+ continue;
+ }
+ if (mdetails->source_peer_migrates) {
+ cur.dpagemap = src_zdd->dpagemap;
+ cur.ops = src_zdd->devmem_allocation->ops;
+ cur.device = cur.dpagemap->drm->dev;
+ pages[i] = src_page;
+ }
+ }
+ if (!pages[i]) {
+ cur.dpagemap = NULL;
+ cur.ops = ops;
+ cur.device = dpagemap->drm->dev;
+ pages[i] = page;
+ }
migrate.dst[i] = migrate_pfn(migrate.dst[i]);
drm_pagemap_get_devmem_page(page, zdd);
- }
- err = ops->copy_to_devmem(pages, pagemap_addr, npages);
+ /* If we switched the migrating drm_pagemap, migrate previous pages now */
+ err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst,
+ pages, pagemap_addr, &last, &cur,
+ mdetails);
+ if (err)
+ goto err_finalize;
+ }
+ cur.start = npages;
+ cur.ops = NULL; /* Force migration */
+ err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst,
+ pages, pagemap_addr, &last, &cur, mdetails);
if (err)
goto err_finalize;
+ drm_WARN_ON(dpagemap->drm, !!own_pages);
+
+ dma_fence_put(devmem_allocation->pre_migrate_fence);
+ devmem_allocation->pre_migrate_fence = NULL;
+
/* Upon success bind devmem allocation to range and zdd */
devmem_allocation->timeslice_expiration = get_jiffies_64() +
- msecs_to_jiffies(timeslice_ms);
- zdd->devmem_allocation = devmem_allocation; /* Owns ref */
+ msecs_to_jiffies(mdetails->timeslice_ms);
err_finalize:
if (err)
drm_pagemap_migration_unlock_put_pages(npages, migrate.dst);
+err_aborted_migration:
migrate_vma_pages(&migrate);
+
+ for (i = 0; i < npages;) {
+ struct page *page = migrate_pfn_to_page(migrate.src[i]);
+ unsigned long nr_pages = page ? NR_PAGES(folio_order(page_folio(page))) : 1;
+
+ if (migrate.src[i] & MIGRATE_PFN_MIGRATE)
+ migrated_pages += nr_pages;
+
+ i += nr_pages;
+ }
+
+ if (!err && migrated_pages < npages - own_pages) {
+ drm_dbg(dpagemap->drm, "Raced while finalizing migration.\n");
+ err = -EBUSY;
+ }
+
migrate_vma_finalize(&migrate);
- drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, npages,
- DMA_TO_DEVICE);
err_free:
- if (zdd)
- drm_pagemap_zdd_put(zdd);
+ drm_pagemap_zdd_put(zdd);
kvfree(buf);
+ return err;
+
err_out:
+ devmem_allocation->ops->devmem_release(devmem_allocation);
return err;
}
EXPORT_SYMBOL_GPL(drm_pagemap_migrate_to_devmem);
@@ -538,6 +778,157 @@ next_put:
return -ENOMEM;
}
+static void drm_pagemap_dev_unhold_work(struct work_struct *work);
+static LLIST_HEAD(drm_pagemap_unhold_list);
+static DECLARE_WORK(drm_pagemap_work, drm_pagemap_dev_unhold_work);
+
+/**
+ * struct drm_pagemap_dev_hold - Struct to aid in drm_device release.
+ * @link: Link into drm_pagemap_unhold_list for deferred reference releases.
+ * @drm: drm device to put.
+ *
+ * When a struct drm_pagemap is released, we also need to release the
+ * reference it holds on the drm device. However, typically that needs
+ * to be done separately from a system-wide workqueue.
+ * Each time a struct drm_pagemap is initialized
+ * (or re-initialized if cached) therefore allocate a separate
+ * drm_pagemap_dev_hold item, from which we put the drm device and
+ * associated module.
+ */
+struct drm_pagemap_dev_hold {
+ struct llist_node link;
+ struct drm_device *drm;
+};
+
+static void drm_pagemap_release(struct kref *ref)
+{
+ struct drm_pagemap *dpagemap = container_of(ref, typeof(*dpagemap), ref);
+ struct drm_pagemap_dev_hold *dev_hold = dpagemap->dev_hold;
+
+ /*
+ * We know the pagemap provider is alive at this point, since
+ * the struct drm_pagemap_dev_hold holds a reference to the
+ * pagemap provider drm_device and its module.
+ */
+ dpagemap->dev_hold = NULL;
+ drm_pagemap_shrinker_add(dpagemap);
+ llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
+ schedule_work(&drm_pagemap_work);
+ /*
+ * Here, either the provider device is still alive, since if called from
+ * page_free(), the caller is holding a reference on the dev_pagemap,
+ * or if called from drm_pagemap_put(), the direct caller is still alive.
+ * This ensures we can't race with THIS module unload.
+ */
+}
+
+static void drm_pagemap_dev_unhold_work(struct work_struct *work)
+{
+ struct llist_node *node = llist_del_all(&drm_pagemap_unhold_list);
+ struct drm_pagemap_dev_hold *dev_hold, *next;
+
+ /*
+ * Deferred release of drm_pagemap provider device and module.
+ * THIS module is kept alive during the release by the
+ * flush_work() in the drm_pagemap_exit() function.
+ */
+ llist_for_each_entry_safe(dev_hold, next, node, link) {
+ struct drm_device *drm = dev_hold->drm;
+ struct module *module = drm->driver->fops->owner;
+
+ drm_dbg(drm, "Releasing reference on provider device and module.\n");
+ drm_dev_put(drm);
+ module_put(module);
+ kfree(dev_hold);
+ }
+}
+
+static struct drm_pagemap_dev_hold *
+drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
+{
+ struct drm_pagemap_dev_hold *dev_hold;
+ struct drm_device *drm = dpagemap->drm;
+
+ dev_hold = kzalloc(sizeof(*dev_hold), GFP_KERNEL);
+ if (!dev_hold)
+ return ERR_PTR(-ENOMEM);
+
+ init_llist_node(&dev_hold->link);
+ dev_hold->drm = drm;
+ (void)try_module_get(drm->driver->fops->owner);
+ drm_dev_get(drm);
+
+ return dev_hold;
+}
+
+/**
+ * drm_pagemap_reinit() - Reinitialize a drm_pagemap
+ * @dpagemap: The drm_pagemap to reinitialize
+ *
+ * Reinitialize a drm_pagemap, for which drm_pagemap_release
+ * has already been called. This interface is intended for the
+ * situation where the driver caches a destroyed drm_pagemap.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int drm_pagemap_reinit(struct drm_pagemap *dpagemap)
+{
+ dpagemap->dev_hold = drm_pagemap_dev_hold(dpagemap);
+ if (IS_ERR(dpagemap->dev_hold))
+ return PTR_ERR(dpagemap->dev_hold);
+
+ kref_init(&dpagemap->ref);
+ return 0;
+}
+EXPORT_SYMBOL(drm_pagemap_reinit);
+
+/**
+ * drm_pagemap_init() - Initialize a pre-allocated drm_pagemap
+ * @dpagemap: The drm_pagemap to initialize.
+ * @pagemap: The associated dev_pagemap providing the device
+ * private pages.
+ * @drm: The drm device. The drm_pagemap holds a reference on the
+ * drm_device and the module owning the drm_device until
+ * drm_pagemap_release(). This facilitates drm_pagemap exporting.
+ * @ops: The drm_pagemap ops.
+ *
+ * Initialize and take an initial reference on a drm_pagemap.
+ * After successful return, use drm_pagemap_put() to destroy.
+ *
+ ** Return: 0 on success, negative error code on error.
+ */
+int drm_pagemap_init(struct drm_pagemap *dpagemap,
+ struct dev_pagemap *pagemap,
+ struct drm_device *drm,
+ const struct drm_pagemap_ops *ops)
+{
+ kref_init(&dpagemap->ref);
+ dpagemap->ops = ops;
+ dpagemap->pagemap = pagemap;
+ dpagemap->drm = drm;
+ dpagemap->cache = NULL;
+ INIT_LIST_HEAD(&dpagemap->shrink_link);
+
+ return drm_pagemap_reinit(dpagemap);
+}
+EXPORT_SYMBOL(drm_pagemap_init);
+
+/**
+ * drm_pagemap_put() - Put a struct drm_pagemap reference
+ * @dpagemap: Pointer to a struct drm_pagemap object.
+ *
+ * Puts a struct drm_pagemap reference and frees the drm_pagemap object
+ * if the refount reaches zero.
+ */
+void drm_pagemap_put(struct drm_pagemap *dpagemap)
+{
+ if (likely(dpagemap)) {
+ drm_pagemap_shrinker_might_lock(dpagemap);
+ kref_put(&dpagemap->ref, drm_pagemap_release);
+ }
+}
+EXPORT_SYMBOL(drm_pagemap_put);
+
/**
* drm_pagemap_evict_to_ram() - Evict GPU SVM range to RAM
* @devmem_allocation: Pointer to the device memory allocation
@@ -550,6 +941,7 @@ next_put:
int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation)
{
const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops;
+ struct drm_pagemap_migrate_details mdetails = {};
unsigned long npages, mpages = 0;
struct page **pages;
unsigned long *src, *dst;
@@ -588,15 +980,17 @@ retry:
if (err || !mpages)
goto err_finalize;
- err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, pagemap_addr,
- dst, npages, DMA_FROM_DEVICE);
+ err = drm_pagemap_migrate_map_pages(devmem_allocation->dev,
+ devmem_allocation->dpagemap, pagemap_addr,
+ dst, npages, DMA_FROM_DEVICE,
+ &mdetails);
if (err)
goto err_finalize;
for (i = 0; i < npages; ++i)
pages[i] = migrate_pfn_to_page(src[i]);
- err = ops->copy_to_ram(pages, pagemap_addr, npages);
+ err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL);
if (err)
goto err_finalize;
@@ -605,8 +999,9 @@ err_finalize:
drm_pagemap_migration_unlock_put_pages(npages, dst);
migrate_device_pages(src, dst, npages);
migrate_device_finalize(src, dst, npages);
- drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, npages,
+ drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, dst, npages,
DMA_FROM_DEVICE);
+
err_free:
kvfree(buf);
err_out:
@@ -627,8 +1022,7 @@ EXPORT_SYMBOL_GPL(drm_pagemap_evict_to_ram);
/**
* __drm_pagemap_migrate_to_ram() - Migrate GPU SVM range to RAM (internal)
* @vas: Pointer to the VM area structure
- * @device_private_page_owner: Device private pages owner
- * @page: Pointer to the page for fault handling (can be NULL)
+ * @page: Pointer to the page for fault handling.
* @fault_addr: Fault address
* @size: Size of migration
*
@@ -639,18 +1033,18 @@ EXPORT_SYMBOL_GPL(drm_pagemap_evict_to_ram);
* Return: 0 on success, negative error code on failure.
*/
static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas,
- void *device_private_page_owner,
struct page *page,
unsigned long fault_addr,
unsigned long size)
{
struct migrate_vma migrate = {
.vma = vas,
- .pgmap_owner = device_private_page_owner,
+ .pgmap_owner = page_pgmap(page)->owner,
.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE |
MIGRATE_VMA_SELECT_DEVICE_COHERENT,
.fault_page = page,
};
+ struct drm_pagemap_migrate_details mdetails = {};
struct drm_pagemap_zdd *zdd;
const struct drm_pagemap_devmem_ops *ops;
struct device *dev = NULL;
@@ -661,12 +1055,9 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas,
void *buf;
int i, err = 0;
- if (page) {
- zdd = page->zone_device_data;
- if (time_before64(get_jiffies_64(),
- zdd->devmem_allocation->timeslice_expiration))
- return 0;
- }
+ zdd = page->zone_device_data;
+ if (time_before64(get_jiffies_64(), zdd->devmem_allocation->timeslice_expiration))
+ return 0;
start = ALIGN_DOWN(fault_addr, size);
end = ALIGN(fault_addr + 1, size);
@@ -702,19 +1093,6 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas,
if (!migrate.cpages)
goto err_free;
- if (!page) {
- for (i = 0; i < npages; ++i) {
- if (!(migrate.src[i] & MIGRATE_PFN_MIGRATE))
- continue;
-
- page = migrate_pfn_to_page(migrate.src[i]);
- break;
- }
-
- if (!page)
- goto err_finalize;
- }
- zdd = page->zone_device_data;
ops = zdd->devmem_allocation->ops;
dev = zdd->devmem_allocation->dev;
@@ -724,15 +1102,15 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas,
if (err)
goto err_finalize;
- err = drm_pagemap_migrate_map_pages(dev, pagemap_addr, migrate.dst, npages,
- DMA_FROM_DEVICE);
+ err = drm_pagemap_migrate_map_pages(dev, zdd->dpagemap, pagemap_addr, migrate.dst, npages,
+ DMA_FROM_DEVICE, &mdetails);
if (err)
goto err_finalize;
for (i = 0; i < npages; ++i)
pages[i] = migrate_pfn_to_page(migrate.src[i]);
- err = ops->copy_to_ram(pages, pagemap_addr, npages);
+ err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL);
if (err)
goto err_finalize;
@@ -742,8 +1120,8 @@ err_finalize:
migrate_vma_pages(&migrate);
migrate_vma_finalize(&migrate);
if (dev)
- drm_pagemap_migrate_unmap_pages(dev, pagemap_addr, npages,
- DMA_FROM_DEVICE);
+ drm_pagemap_migrate_unmap_pages(dev, pagemap_addr, migrate.dst,
+ npages, DMA_FROM_DEVICE);
err_free:
kvfree(buf);
err_out:
@@ -780,7 +1158,6 @@ static vm_fault_t drm_pagemap_migrate_to_ram(struct vm_fault *vmf)
int err;
err = __drm_pagemap_migrate_to_ram(vmf->vma,
- zdd->device_private_page_owner,
vmf->page, vmf->address,
zdd->devmem_allocation->size);
@@ -813,11 +1190,14 @@ EXPORT_SYMBOL_GPL(drm_pagemap_pagemap_ops_get);
* @ops: Pointer to the operations structure for GPU SVM device memory
* @dpagemap: The struct drm_pagemap we're allocating from.
* @size: Size of device memory allocation
+ * @pre_migrate_fence: Fence to wait for or pipeline behind before migration starts.
+ * (May be NULL).
*/
void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation,
struct device *dev, struct mm_struct *mm,
const struct drm_pagemap_devmem_ops *ops,
- struct drm_pagemap *dpagemap, size_t size)
+ struct drm_pagemap *dpagemap, size_t size,
+ struct dma_fence *pre_migrate_fence)
{
init_completion(&devmem_allocation->detached);
devmem_allocation->dev = dev;
@@ -825,6 +1205,7 @@ void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation,
devmem_allocation->ops = ops;
devmem_allocation->dpagemap = dpagemap;
devmem_allocation->size = size;
+ devmem_allocation->pre_migrate_fence = pre_migrate_fence;
}
EXPORT_SYMBOL_GPL(drm_pagemap_devmem_init);
@@ -880,3 +1261,19 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
return err;
}
EXPORT_SYMBOL(drm_pagemap_populate_mm);
+
+void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim)
+{
+ if (dpagemap->ops->destroy)
+ dpagemap->ops->destroy(dpagemap, is_atomic_or_reclaim);
+ else
+ kfree(dpagemap);
+}
+
+static void drm_pagemap_exit(void)
+{
+ flush_work(&drm_pagemap_work);
+ if (WARN_ON(!llist_empty(&drm_pagemap_unhold_list)))
+ disable_work_sync(&drm_pagemap_work);
+}
+module_exit(drm_pagemap_exit);
diff --git a/drivers/gpu/drm/drm_pagemap_util.c b/drivers/gpu/drm/drm_pagemap_util.c
new file mode 100644
index 000000000000..c6ae3357c7fb
--- /dev/null
+++ b/drivers/gpu/drm/drm_pagemap_util.c
@@ -0,0 +1,568 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/slab.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_pagemap.h>
+#include <drm/drm_pagemap_util.h>
+#include <drm/drm_print.h>
+
+/**
+ * struct drm_pagemap_cache - Lookup structure for pagemaps
+ *
+ * Structure to keep track of active (refcount > 1) and inactive
+ * (refcount == 0) pagemaps. Inactive pagemaps can be made active
+ * again by waiting for the @queued completion (indicating that the
+ * pagemap has been put on the @shrinker's list of shrinkable
+ * pagemaps, and then successfully removing it from @shrinker's
+ * list. The latter may fail if the shrinker is already in the
+ * process of freeing the pagemap. A struct drm_pagemap_cache can
+ * hold a single struct drm_pagemap.
+ */
+struct drm_pagemap_cache {
+ /** @lookup_mutex: Mutex making the lookup process atomic */
+ struct mutex lookup_mutex;
+ /** @lock: Lock protecting the @dpagemap pointer */
+ spinlock_t lock;
+ /** @shrinker: Pointer to the shrinker used for this cache. Immutable. */
+ struct drm_pagemap_shrinker *shrinker;
+ /** @dpagemap: Non-refcounted pointer to the drm_pagemap */
+ struct drm_pagemap *dpagemap;
+ /**
+ * @queued: Signals when an inactive drm_pagemap has been put on
+ * @shrinker's list.
+ */
+ struct completion queued;
+};
+
+/**
+ * struct drm_pagemap_shrinker - Shrinker to remove unused pagemaps
+ */
+struct drm_pagemap_shrinker {
+ /** @drm: Pointer to the drm device. */
+ struct drm_device *drm;
+ /** @lock: Spinlock to protect the @dpagemaps list. */
+ spinlock_t lock;
+ /** @dpagemaps: List of unused dpagemaps. */
+ struct list_head dpagemaps;
+ /** @num_dpagemaps: Number of unused dpagemaps in @dpagemaps. */
+ atomic_t num_dpagemaps;
+ /** @shrink: Pointer to the struct shrinker. */
+ struct shrinker *shrink;
+};
+
+static bool drm_pagemap_shrinker_cancel(struct drm_pagemap *dpagemap);
+
+static void drm_pagemap_cache_fini(void *arg)
+{
+ struct drm_pagemap_cache *cache = arg;
+ struct drm_pagemap *dpagemap;
+
+ drm_dbg(cache->shrinker->drm, "Destroying dpagemap cache.\n");
+ spin_lock(&cache->lock);
+ dpagemap = cache->dpagemap;
+ if (!dpagemap) {
+ spin_unlock(&cache->lock);
+ goto out;
+ }
+
+ if (drm_pagemap_shrinker_cancel(dpagemap)) {
+ cache->dpagemap = NULL;
+ spin_unlock(&cache->lock);
+ drm_pagemap_destroy(dpagemap, false);
+ }
+
+out:
+ mutex_destroy(&cache->lookup_mutex);
+ kfree(cache);
+}
+
+/**
+ * drm_pagemap_cache_create_devm() - Create a drm_pagemap_cache
+ * @shrinker: Pointer to a struct drm_pagemap_shrinker.
+ *
+ * Create a device-managed drm_pagemap cache. The cache is automatically
+ * destroyed on struct device removal, at which point any *inactive*
+ * drm_pagemap's are destroyed.
+ *
+ * Return: Pointer to a struct drm_pagemap_cache on success. Error pointer
+ * on failure.
+ */
+struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct drm_pagemap_shrinker *shrinker)
+{
+ struct drm_pagemap_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+ int err;
+
+ if (!cache)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&cache->lookup_mutex);
+ spin_lock_init(&cache->lock);
+ cache->shrinker = shrinker;
+ init_completion(&cache->queued);
+ err = devm_add_action_or_reset(shrinker->drm->dev, drm_pagemap_cache_fini, cache);
+ if (err)
+ return ERR_PTR(err);
+
+ return cache;
+}
+EXPORT_SYMBOL(drm_pagemap_cache_create_devm);
+
+/**
+ * DOC: Cache lookup
+ *
+ * Cache lookup should be done under a locked mutex, so that a
+ * failed drm_pagemap_get_from_cache() and a following
+ * drm_pagemap_cache_setpagemap() are carried out as an atomic
+ * operation WRT other lookups. Otherwise, racing lookups may
+ * unnecessarily concurrently create pagemaps to fulfill a
+ * failed lookup. The API provides two functions to perform this lock,
+ * drm_pagemap_lock_lookup() and drm_pagemap_unlock_lookup() and they
+ * should be used in the following way:
+ *
+ * .. code-block:: c
+ *
+ * drm_pagemap_lock_lookup(cache);
+ * dpagemap = drm_pagemap_get_from_cache(cache);
+ * if (dpagemap)
+ * goto out_unlock;
+ *
+ * dpagemap = driver_create_new_dpagemap();
+ * if (!IS_ERR(dpagemap))
+ * drm_pagemap_cache_set_pagemap(cache, dpagemap);
+ *
+ * out_unlock:
+ * drm_pagemap_unlock_lookup(cache);
+ */
+
+/**
+ * drm_pagemap_cache_lock_lookup() - Lock a drm_pagemap_cache for lookup.
+ * @cache: The drm_pagemap_cache to lock.
+ *
+ * Return: %-EINTR if interrupted while blocking. %0 otherwise.
+ */
+int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache *cache)
+{
+ return mutex_lock_interruptible(&cache->lookup_mutex);
+}
+EXPORT_SYMBOL(drm_pagemap_cache_lock_lookup);
+
+/**
+ * drm_pagemap_cache_unlock_lookup() - Unlock a drm_pagemap_cache after lookup.
+ * @cache: The drm_pagemap_cache to unlock.
+ */
+void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache *cache)
+{
+ mutex_unlock(&cache->lookup_mutex);
+}
+EXPORT_SYMBOL(drm_pagemap_cache_unlock_lookup);
+
+/**
+ * drm_pagemap_get_from_cache() - Lookup of drm_pagemaps.
+ * @cache: The cache used for lookup.
+ *
+ * If an active pagemap is present in the cache, it is immediately returned.
+ * If an inactive pagemap is present, it's removed from the shrinker list and
+ * an attempt is made to make it active.
+ * If no pagemap present or the attempt to make it active failed, %NULL is returned
+ * to indicate to the caller to create a new drm_pagemap and insert it into
+ * the cache.
+ *
+ * Return: A reference-counted pointer to a drm_pagemap if successful. An error
+ * pointer if an error occurred, or %NULL if no drm_pagemap was found and
+ * the caller should insert a new one.
+ */
+struct drm_pagemap *drm_pagemap_get_from_cache(struct drm_pagemap_cache *cache)
+{
+ struct drm_pagemap *dpagemap;
+ int err;
+
+ lockdep_assert_held(&cache->lookup_mutex);
+retry:
+ spin_lock(&cache->lock);
+ dpagemap = cache->dpagemap;
+ if (drm_pagemap_get_unless_zero(dpagemap)) {
+ spin_unlock(&cache->lock);
+ return dpagemap;
+ }
+
+ if (!dpagemap) {
+ spin_unlock(&cache->lock);
+ return NULL;
+ }
+
+ if (!try_wait_for_completion(&cache->queued)) {
+ spin_unlock(&cache->lock);
+ err = wait_for_completion_interruptible(&cache->queued);
+ if (err)
+ return ERR_PTR(err);
+ goto retry;
+ }
+
+ if (drm_pagemap_shrinker_cancel(dpagemap)) {
+ cache->dpagemap = NULL;
+ spin_unlock(&cache->lock);
+ err = drm_pagemap_reinit(dpagemap);
+ if (err) {
+ drm_pagemap_destroy(dpagemap, false);
+ return ERR_PTR(err);
+ }
+ drm_pagemap_cache_set_pagemap(cache, dpagemap);
+ } else {
+ cache->dpagemap = NULL;
+ spin_unlock(&cache->lock);
+ dpagemap = NULL;
+ }
+
+ return dpagemap;
+}
+EXPORT_SYMBOL(drm_pagemap_get_from_cache);
+
+/**
+ * drm_pagemap_cache_set_pagemap() - Assign a drm_pagemap to a drm_pagemap_cache
+ * @cache: The cache to assign the drm_pagemap to.
+ * @dpagemap: The drm_pagemap to assign.
+ *
+ * The function must be called to populate a drm_pagemap_cache only
+ * after a call to drm_pagemap_get_from_cache() returns NULL.
+ */
+void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache *cache, struct drm_pagemap *dpagemap)
+{
+ struct drm_device *drm = dpagemap->drm;
+
+ lockdep_assert_held(&cache->lookup_mutex);
+ spin_lock(&cache->lock);
+ dpagemap->cache = cache;
+ swap(cache->dpagemap, dpagemap);
+ reinit_completion(&cache->queued);
+ spin_unlock(&cache->lock);
+ drm_WARN_ON(drm, !!dpagemap);
+}
+EXPORT_SYMBOL(drm_pagemap_cache_set_pagemap);
+
+/**
+ * drm_pagemap_get_from_cache_if_active() - Quick lookup of active drm_pagemaps
+ * @cache: The cache to lookup from.
+ *
+ * Function that should be used to lookup a drm_pagemap that is already active.
+ * (refcount > 0).
+ *
+ * Return: A pointer to the cache's drm_pagemap if it's active; %NULL otherwise.
+ */
+struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct drm_pagemap_cache *cache)
+{
+ struct drm_pagemap *dpagemap;
+
+ spin_lock(&cache->lock);
+ dpagemap = drm_pagemap_get_unless_zero(cache->dpagemap);
+ spin_unlock(&cache->lock);
+
+ return dpagemap;
+}
+EXPORT_SYMBOL(drm_pagemap_get_from_cache_if_active);
+
+static bool drm_pagemap_shrinker_cancel(struct drm_pagemap *dpagemap)
+{
+ struct drm_pagemap_cache *cache = dpagemap->cache;
+ struct drm_pagemap_shrinker *shrinker = cache->shrinker;
+
+ spin_lock(&shrinker->lock);
+ if (list_empty(&dpagemap->shrink_link)) {
+ spin_unlock(&shrinker->lock);
+ return false;
+ }
+
+ list_del_init(&dpagemap->shrink_link);
+ atomic_dec(&shrinker->num_dpagemaps);
+ spin_unlock(&shrinker->lock);
+ return true;
+}
+
+#ifdef CONFIG_PROVE_LOCKING
+/**
+ * drm_pagemap_shrinker_might_lock() - lockdep test for drm_pagemap_shrinker_add()
+ * @dpagemap: The drm pagemap.
+ *
+ * The drm_pagemap_shrinker_add() function performs some locking.
+ * This function can be called in code-paths that might
+ * call drm_pagemap_shrinker_add() to detect any lockdep problems early.
+ */
+void drm_pagemap_shrinker_might_lock(struct drm_pagemap *dpagemap)
+{
+ int idx;
+
+ if (drm_dev_enter(dpagemap->drm, &idx)) {
+ struct drm_pagemap_cache *cache = dpagemap->cache;
+
+ if (cache)
+ might_lock(&cache->shrinker->lock);
+
+ drm_dev_exit(idx);
+ }
+}
+#endif
+
+/**
+ * drm_pagemap_shrinker_add() - Add a drm_pagemap to the shrinker list or destroy
+ * @dpagemap: The drm_pagemap.
+ *
+ * If @dpagemap is associated with a &struct drm_pagemap_cache AND the
+ * struct device backing the drm device is still alive, add @dpagemap to
+ * the &struct drm_pagemap_shrinker list of shrinkable drm_pagemaps.
+ *
+ * Otherwise destroy the pagemap directly using drm_pagemap_destroy().
+ *
+ * This is an internal function which is not intended to be exposed to drivers.
+ */
+void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap)
+{
+ struct drm_pagemap_cache *cache;
+ struct drm_pagemap_shrinker *shrinker;
+ int idx;
+
+ /*
+ * The pagemap cache and shrinker are disabled at
+ * pci device remove time. After that, dpagemaps
+ * are freed directly.
+ */
+ if (!drm_dev_enter(dpagemap->drm, &idx))
+ goto out_no_cache;
+
+ cache = dpagemap->cache;
+ if (!cache) {
+ drm_dev_exit(idx);
+ goto out_no_cache;
+ }
+
+ shrinker = cache->shrinker;
+ spin_lock(&shrinker->lock);
+ list_add_tail(&dpagemap->shrink_link, &shrinker->dpagemaps);
+ atomic_inc(&shrinker->num_dpagemaps);
+ spin_unlock(&shrinker->lock);
+ complete_all(&cache->queued);
+ drm_dev_exit(idx);
+ return;
+
+out_no_cache:
+ drm_pagemap_destroy(dpagemap, true);
+}
+
+static unsigned long
+drm_pagemap_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct drm_pagemap_shrinker *shrinker = shrink->private_data;
+ unsigned long count = atomic_read(&shrinker->num_dpagemaps);
+
+ return count ? : SHRINK_EMPTY;
+}
+
+static unsigned long
+drm_pagemap_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct drm_pagemap_shrinker *shrinker = shrink->private_data;
+ struct drm_pagemap *dpagemap;
+ struct drm_pagemap_cache *cache;
+ unsigned long nr_freed = 0;
+
+ sc->nr_scanned = 0;
+ spin_lock(&shrinker->lock);
+ do {
+ dpagemap = list_first_entry_or_null(&shrinker->dpagemaps, typeof(*dpagemap),
+ shrink_link);
+ if (!dpagemap)
+ break;
+
+ atomic_dec(&shrinker->num_dpagemaps);
+ list_del_init(&dpagemap->shrink_link);
+ spin_unlock(&shrinker->lock);
+
+ sc->nr_scanned++;
+ nr_freed++;
+
+ cache = dpagemap->cache;
+ spin_lock(&cache->lock);
+ cache->dpagemap = NULL;
+ spin_unlock(&cache->lock);
+
+ drm_dbg(dpagemap->drm, "Shrinking dpagemap %p.\n", dpagemap);
+ drm_pagemap_destroy(dpagemap, true);
+ spin_lock(&shrinker->lock);
+ } while (sc->nr_scanned < sc->nr_to_scan);
+ spin_unlock(&shrinker->lock);
+
+ return sc->nr_scanned ? nr_freed : SHRINK_STOP;
+}
+
+static void drm_pagemap_shrinker_fini(void *arg)
+{
+ struct drm_pagemap_shrinker *shrinker = arg;
+
+ drm_dbg(shrinker->drm, "Destroying dpagemap shrinker.\n");
+ drm_WARN_ON(shrinker->drm, !!atomic_read(&shrinker->num_dpagemaps));
+ shrinker_free(shrinker->shrink);
+ kfree(shrinker);
+}
+
+/**
+ * drm_pagemap_shrinker_create_devm() - Create and register a pagemap shrinker
+ * @drm: The drm device
+ *
+ * Create and register a pagemap shrinker that shrinks unused pagemaps
+ * and thereby reduces memory footprint.
+ * The shrinker is drm_device managed and unregisters itself when
+ * the drm device is removed.
+ *
+ * Return: %0 on success, negative error code on failure.
+ */
+struct drm_pagemap_shrinker *drm_pagemap_shrinker_create_devm(struct drm_device *drm)
+{
+ struct drm_pagemap_shrinker *shrinker;
+ struct shrinker *shrink;
+ int err;
+
+ shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL);
+ if (!shrinker)
+ return ERR_PTR(-ENOMEM);
+
+ shrink = shrinker_alloc(0, "drm-drm_pagemap:%s", drm->unique);
+ if (!shrink) {
+ kfree(shrinker);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ spin_lock_init(&shrinker->lock);
+ INIT_LIST_HEAD(&shrinker->dpagemaps);
+ shrinker->drm = drm;
+ shrinker->shrink = shrink;
+ shrink->count_objects = drm_pagemap_shrinker_count;
+ shrink->scan_objects = drm_pagemap_shrinker_scan;
+ shrink->private_data = shrinker;
+ shrinker_register(shrink);
+
+ err = devm_add_action_or_reset(drm->dev, drm_pagemap_shrinker_fini, shrinker);
+ if (err)
+ return ERR_PTR(err);
+
+ return shrinker;
+}
+EXPORT_SYMBOL(drm_pagemap_shrinker_create_devm);
+
+/**
+ * struct drm_pagemap_owner - Device interconnect group
+ * @kref: Reference count.
+ *
+ * A struct drm_pagemap_owner identifies a device interconnect group.
+ */
+struct drm_pagemap_owner {
+ struct kref kref;
+};
+
+static void drm_pagemap_owner_release(struct kref *kref)
+{
+ kfree(container_of(kref, struct drm_pagemap_owner, kref));
+}
+
+/**
+ * drm_pagemap_release_owner() - Stop participating in an interconnect group
+ * @peer: Pointer to the struct drm_pagemap_peer used when joining the group
+ *
+ * Stop participating in an interconnect group. This function is typically
+ * called when a pagemap is removed to indicate that it doesn't need to
+ * be taken into account.
+ */
+void drm_pagemap_release_owner(struct drm_pagemap_peer *peer)
+{
+ struct drm_pagemap_owner_list *owner_list = peer->list;
+
+ if (!owner_list)
+ return;
+
+ mutex_lock(&owner_list->lock);
+ list_del(&peer->link);
+ kref_put(&peer->owner->kref, drm_pagemap_owner_release);
+ peer->owner = NULL;
+ mutex_unlock(&owner_list->lock);
+}
+EXPORT_SYMBOL(drm_pagemap_release_owner);
+
+/**
+ * typedef interconnect_fn - Callback function to identify fast interconnects
+ * @peer1: First endpoint.
+ * @peer2: Second endpont.
+ *
+ * The function returns %true iff @peer1 and @peer2 have a fast interconnect.
+ * Note that this is symmetrical. The function has no notion of client and provider,
+ * which may not be sufficient in some cases. However, since the callback is intended
+ * to guide in providing common pagemap owners, the notion of a common owner to
+ * indicate fast interconnects would then have to change as well.
+ *
+ * Return: %true iff @peer1 and @peer2 have a fast interconnect. Otherwise @false.
+ */
+typedef bool (*interconnect_fn)(struct drm_pagemap_peer *peer1, struct drm_pagemap_peer *peer2);
+
+/**
+ * drm_pagemap_acquire_owner() - Join an interconnect group
+ * @peer: A struct drm_pagemap_peer keeping track of the device interconnect
+ * @owner_list: Pointer to the owner_list, keeping track of all interconnects
+ * @has_interconnect: Callback function to determine whether two peers have a
+ * fast local interconnect.
+ *
+ * Repeatedly calls @has_interconnect for @peer and other peers on @owner_list to
+ * determine a set of peers for which @peer has a fast interconnect. That set will
+ * have common &struct drm_pagemap_owner, and upon successful return, @peer::owner
+ * will point to that struct, holding a reference, and @peer will be registered in
+ * @owner_list. If @peer doesn't have any fast interconnects to other @peers, a
+ * new unique &struct drm_pagemap_owner will be allocated for it, and that
+ * may be shared with other peers that, at a later point, are determined to have
+ * a fast interconnect with @peer.
+ *
+ * When @peer no longer participates in an interconnect group,
+ * drm_pagemap_release_owner() should be called to drop the reference on the
+ * struct drm_pagemap_owner.
+ *
+ * Return: %0 on success, negative error code on failure.
+ */
+int drm_pagemap_acquire_owner(struct drm_pagemap_peer *peer,
+ struct drm_pagemap_owner_list *owner_list,
+ interconnect_fn has_interconnect)
+{
+ struct drm_pagemap_peer *cur_peer;
+ struct drm_pagemap_owner *owner = NULL;
+ bool interconnect = false;
+
+ mutex_lock(&owner_list->lock);
+ might_alloc(GFP_KERNEL);
+ list_for_each_entry(cur_peer, &owner_list->peers, link) {
+ if (cur_peer->owner != owner) {
+ if (owner && interconnect)
+ break;
+ owner = cur_peer->owner;
+ interconnect = true;
+ }
+ if (interconnect && !has_interconnect(peer, cur_peer))
+ interconnect = false;
+ }
+
+ if (!interconnect) {
+ owner = kmalloc(sizeof(*owner), GFP_KERNEL);
+ if (!owner) {
+ mutex_unlock(&owner_list->lock);
+ return -ENOMEM;
+ }
+ kref_init(&owner->kref);
+ list_add_tail(&peer->link, &owner_list->peers);
+ } else {
+ kref_get(&owner->kref);
+ list_add_tail(&peer->link, &cur_peer->link);
+ }
+ peer->owner = owner;
+ peer->list = owner_list;
+ mutex_unlock(&owner_list->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(drm_pagemap_acquire_owner);
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 7f08b4cd91d6..8dcc85cb8d42 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -98,6 +98,7 @@ xe-y += xe_bb.o \
xe_page_reclaim.o \
xe_pat.o \
xe_pci.o \
+ xe_pci_rebar.o \
xe_pcode.o \
xe_pm.o \
xe_preempt_fence.o \
@@ -116,6 +117,7 @@ xe-y += xe_bb.o \
xe_sa.o \
xe_sched_job.o \
xe_shrinker.o \
+ xe_soc_remapper.o \
xe_step.o \
xe_survivability_mode.o \
xe_sync.o \
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 89a4f8c504e6..e33bd622ab44 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -8,6 +8,8 @@
#include <linux/types.h>
+#include "abi/guc_scheduler_abi.h"
+
/**
* DOC: GuC KLV
*
@@ -46,11 +48,18 @@
* Refers to 32 bit architecture version as reported by the HW IP.
* This key is supported on MTL+ platforms only.
* Requires GuC ABI 1.2+.
+ *
+ * _`GUC_KLV_GLOBAL_CFG_GROUP_SCHEDULING_AVAILABLE` : 0x3001
+ * Tells the driver whether scheduler groups are enabled or not.
+ * Requires GuC ABI 1.26+
*/
#define GUC_KLV_GLOBAL_CFG_GMD_ID_KEY 0x3000u
#define GUC_KLV_GLOBAL_CFG_GMD_ID_LEN 1u
+#define GUC_KLV_GLOBAL_CFG_GROUP_SCHEDULING_AVAILABLE_KEY 0x3001u
+#define GUC_KLV_GLOBAL_CFG_GROUP_SCHEDULING_AVAILABLE_LEN 1u
+
/**
* DOC: GuC Self Config KLVs
*
@@ -200,6 +209,20 @@ enum {
* :0: adverse events are not counted (default)
* :n: sample period in milliseconds
*
+ * _`GUC_KLV_VGT_POLICY_ENGINE_GROUP_CONFIG` : 0x8004
+ * This config allows the PF to split the engines across scheduling groups.
+ * Each group is independently timesliced across VFs, allowing different
+ * VFs to be active on the HW at the same time. When enabling this feature,
+ * all engines must be assigned to a group (and only one group), or they
+ * will be excluded from scheduling after this KLV is sent. To enable
+ * the groups, the driver must provide a masks array with
+ * GUC_MAX_ENGINE_CLASSES entries for each group, with each mask indicating
+ * which logical instances of that class belong to the group. Therefore,
+ * the length of this KLV when enabling groups is
+ * num_groups * GUC_MAX_ENGINE_CLASSES. To disable the groups, the driver
+ * must send the KLV without any payload (i.e. len = 0). The maximum
+ * number of groups is 8.
+ *
* _`GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH` : 0x8D00
* This enum is to reset utilized HW engine after VF Switch (i.e to clean
* up Stale HW register left behind by previous VF)
@@ -214,6 +237,12 @@ enum {
#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY 0x8002
#define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_LEN 1u
+#define GUC_KLV_VGT_POLICY_ENGINE_GROUP_CONFIG_KEY 0x8004
+#define GUC_KLV_VGT_POLICY_ENGINE_GROUP_MAX_COUNT GUC_MAX_SCHED_GROUPS
+#define GUC_KLV_VGT_POLICY_ENGINE_GROUP_CONFIG_MIN_LEN 0
+#define GUC_KLV_VGT_POLICY_ENGINE_GROUP_CONFIG_MAX_LEN \
+ (GUC_KLV_VGT_POLICY_ENGINE_GROUP_MAX_COUNT * GUC_MAX_ENGINE_CLASSES)
+
#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY 0x8D00
#define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_LEN 1u
@@ -268,6 +297,10 @@ enum {
* it to take effect. Such cases might typically happen on a 1PF+1VF
* Virtualization config enabled for heavier workloads like AI/ML.
*
+ * If scheduling groups are supported, the provided value is applied to all
+ * groups (even if they've not yet been enabled). Support for this feature
+ * is available from GuC 70.53.0.
+ *
* The max value for this KLV is 100 seconds, anything exceeding that
* will be clamped to the max.
*
@@ -290,6 +323,10 @@ enum {
* on a 1PF+1VF Virtualization config enabled for heavier workloads like
* AI/ML.
*
+ * If scheduling groups are supported, the provided value is applied to all
+ * groups (even if they've not yet been enabled). Support for this feature
+ * is available from GuC 70.53.0.
+ *
* The max value for this KLV is 100 seconds, anything exceeding that
* will be clamped to the max.
*
@@ -358,6 +395,26 @@ enum {
* groups and cause the latter to be turned off when registered with the
* GuC, this config allows the PF to set a threshold for multi-LRC context
* registrations by VFs to monitor their behavior.
+ *
+ * _`GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM' : 0x8A0E
+ * This config sets the VFs-execution-quantum for each scheduling group in
+ * milliseconds. The driver must provide an array of values, with each of
+ * them matching the respective group index (first value goes to group 0,
+ * second to group 1, etc). The setting of group values follows the same
+ * behavior and rules as setting via GUC_KLV_VF_CFG_EXEC_QUANTUM. Note that
+ * the GuC always sets the EQ for all groups (even the non-enabled ones),
+ * so if we provide fewer values than the max the GuC will use 0 for the
+ * remaining groups. This KLV is available starting from GuC 70.53.0.
+ *
+ * _`GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT' : 0x8A0F
+ * This config sets the VFs-preemption-timeout for each scheduling group in
+ * microseconds. The driver must provide an array of values, with each of
+ * them matching the respective group index (first value goes to group 0,
+ * second to group 1, etc). The setting of group values follows the same
+ * behavior and rules as setting via GUC_KLV_VF_CFG_PREEMPT_TIMEOUT. Note
+ * that the GuC always sets the EQ for all groups (even the non-enabled
+ * ones), so if we provide fewer values than the max the GuC will use 0 for
+ * the remaining groups. This KLV is available starting from GuC 70.53.0.
*/
#define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001
@@ -419,6 +476,13 @@ enum {
#define GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT_KEY 0x8a0d
#define GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT_LEN 1u
+#define GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_KEY 0x8a0e
+#define GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_MIN_LEN 1u
+#define GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_MAX_LEN GUC_MAX_SCHED_GROUPS
+
+#define GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_KEY 0x8a0f
+#define GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_MIN_LEN 1u
+#define GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_MAX_LEN GUC_MAX_SCHED_GROUPS
/*
* Workaround keys:
*/
diff --git a/drivers/gpu/drm/xe/abi/guc_scheduler_abi.h b/drivers/gpu/drm/xe/abi/guc_scheduler_abi.h
new file mode 100644
index 000000000000..513b22a87428
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_scheduler_abi.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_SCHEDULER_ABI_H
+#define _ABI_GUC_SCHEDULER_ABI_H
+
+#include <linux/types.h>
+
+/**
+ * Generic defines required for registration with and submissions to the GuC
+ * scheduler. Includes engine class/instance defines and context attributes
+ * (id, priority, etc)
+ */
+
+/* Engine classes/instances */
+#define GUC_RENDER_CLASS 0
+#define GUC_VIDEO_CLASS 1
+#define GUC_VIDEOENHANCE_CLASS 2
+#define GUC_BLITTER_CLASS 3
+#define GUC_COMPUTE_CLASS 4
+#define GUC_GSC_OTHER_CLASS 5
+#define GUC_LAST_ENGINE_CLASS GUC_GSC_OTHER_CLASS
+#define GUC_MAX_ENGINE_CLASSES 16
+#define GUC_MAX_INSTANCES_PER_CLASS 32
+
+/* context priority values */
+#define GUC_CLIENT_PRIORITY_KMD_HIGH 0
+#define GUC_CLIENT_PRIORITY_HIGH 1
+#define GUC_CLIENT_PRIORITY_KMD_NORMAL 2
+#define GUC_CLIENT_PRIORITY_NORMAL 3
+#define GUC_CLIENT_PRIORITY_NUM 4
+
+/* Context registration */
+#define GUC_ID_MAX 65535
+#define GUC_ID_UNKNOWN 0xffffffff
+
+#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
+#define CONTEXT_REGISTRATION_FLAG_TYPE GENMASK(2, 1)
+#define GUC_CONTEXT_NORMAL 0
+#define GUC_CONTEXT_COMPRESSION_SAVE 1
+#define GUC_CONTEXT_COMPRESSION_RESTORE 2
+#define GUC_CONTEXT_COUNT (GUC_CONTEXT_COMPRESSION_RESTORE + 1)
+
+/* context enable/disable */
+#define GUC_CONTEXT_DISABLE 0
+#define GUC_CONTEXT_ENABLE 1
+
+/* scheduler groups */
+#define GUC_MAX_SCHED_GROUPS 8
+
+struct guc_sched_group {
+ u32 engines[GUC_MAX_ENGINE_CLASSES];
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h
index 0f79c0714454..240d57993ea6 100644
--- a/drivers/gpu/drm/xe/regs/xe_pmt.h
+++ b/drivers/gpu/drm/xe/regs/xe_pmt.h
@@ -18,9 +18,6 @@
#define BMG_TELEMETRY_BASE_OFFSET 0xE0000
#define BMG_TELEMETRY_OFFSET (SOC_BASE + BMG_TELEMETRY_BASE_OFFSET)
-#define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08)
-#define SG_REMAP_BITS REG_GENMASK(31, 24)
-
#define BMG_MODS_RESIDENCY_OFFSET (0x4D0)
#define BMG_G2_RESIDENCY_OFFSET (0x530)
#define BMG_G6_RESIDENCY_OFFSET (0x538)
diff --git a/drivers/gpu/drm/xe/regs/xe_soc_remapper_regs.h b/drivers/gpu/drm/xe/regs/xe_soc_remapper_regs.h
new file mode 100644
index 000000000000..be0eb37e73ad
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_soc_remapper_regs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+#ifndef _XE_SOC_REMAPPER_REGS_H_
+#define _XE_SOC_REMAPPER_REGS_H_
+
+#include "xe_regs.h"
+
+#define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08)
+#define SG_REMAP_TELEM_MASK REG_GENMASK(31, 24)
+#define SG_REMAP_SYSCTRL_MASK REG_GENMASK(23, 16)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 00afc84a8683..e101d290b2a6 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -16,6 +16,7 @@
#include <drm/drm_gem_ttm_helper.h>
#include <drm/drm_ioctl.h>
#include <drm/drm_managed.h>
+#include <drm/drm_pagemap_util.h>
#include <drm/drm_print.h>
#include <uapi/drm/xe_drm.h>
@@ -61,8 +62,10 @@
#include "xe_pxp.h"
#include "xe_query.h"
#include "xe_shrinker.h"
+#include "xe_soc_remapper.h"
#include "xe_survivability_mode.h"
#include "xe_sriov.h"
+#include "xe_svm.h"
#include "xe_tile.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_ttm_sys_mgr.h"
@@ -376,6 +379,20 @@ static const struct file_operations xe_driver_fops = {
.fop_flags = FOP_UNSIGNED_OFFSET,
};
+/**
+ * xe_is_xe_file() - Is the file an xe device file?
+ * @file: The file.
+ *
+ * Checks whether the file is opened against
+ * an xe device.
+ *
+ * Return: %true if an xe file, %false if not.
+ */
+bool xe_is_xe_file(const struct file *file)
+{
+ return file->f_op == &xe_driver_fops;
+}
+
static struct drm_driver driver = {
/* Don't use MTRRs here; the Xserver or userspace app should
* deal with them for Intel hardware.
@@ -472,6 +489,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
init_rwsem(&xe->usm.lock);
+ err = xe_pagemap_shrinker_create(xe);
+ if (err)
+ goto err;
+
xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
@@ -969,6 +990,10 @@ int xe_device_probe(struct xe_device *xe)
xe_nvm_init(xe);
+ err = xe_soc_remapper_init(xe);
+ if (err)
+ return err;
+
err = xe_heci_gsc_init(xe);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 6604b89330d5..3e72fa4609f8 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -200,6 +200,8 @@ void xe_file_put(struct xe_file *xef);
int xe_is_injection_active(void);
+bool xe_is_xe_file(const struct file *file);
+
/*
* Occasionally it is seen that the G2H worker starts running after a delay of more than
* a second even after being queued and activated by the Linux workqueue subsystem. This
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index dad355fec50c..a85be9ba175e 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -36,6 +36,8 @@
#define TEST_VM_OPS_ERROR
#endif
+struct dram_info;
+struct drm_pagemap_shrinker;
struct intel_display;
struct intel_dg_nvm_dev;
struct xe_ggtt;
@@ -332,6 +334,10 @@ struct xe_device {
u8 has_pxp:1;
/** @info.has_range_tlb_inval: Has range based TLB invalidations */
u8 has_range_tlb_inval:1;
+ /** @info.has_soc_remapper_sysctrl: Has SoC remapper system controller */
+ u8 has_soc_remapper_sysctrl:1;
+ /** @info.has_soc_remapper_telem: Has SoC remapper telemetry support */
+ u8 has_soc_remapper_telem:1;
/** @info.has_sriov: Supports SR-IOV */
u8 has_sriov:1;
/** @info.has_usm: Device has unified shared memory support */
@@ -449,6 +455,10 @@ struct xe_device {
#define XE_PAGEFAULT_QUEUE_COUNT 4
/** @usm.pf_queue: Page fault queues */
struct xe_pagefault_queue pf_queue[XE_PAGEFAULT_QUEUE_COUNT];
+#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
+ /** @usm.pagemap_shrinker: Shrinker for unused pagemaps */
+ struct drm_pagemap_shrinker *dpagemap_shrinker;
+#endif
} usm;
/** @pinned: pinned BO state */
@@ -572,6 +582,18 @@ struct xe_device {
struct mutex lock;
} pmt;
+ /** @soc_remapper: SoC remapper object */
+ struct {
+ /** @soc_remapper.lock: Serialize access to SoC Remapper's index registers */
+ spinlock_t lock;
+
+ /** @soc_remapper.set_telem_region: Set telemetry index */
+ void (*set_telem_region)(struct xe_device *xe, u32 index);
+
+ /** @soc_remapper.set_sysctrl_region: Set system controller index */
+ void (*set_sysctrl_region)(struct xe_device *xe, u32 index);
+ } soc_remapper;
+
/**
* @pm_callback_task: Track the active task that is running in either
* the runtime_suspend or runtime_resume callbacks.
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 41023a464480..0b9e074b022f 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -17,6 +17,7 @@
#include "xe_dep_scheduler.h"
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_vf.h"
#include "xe_hw_engine_class_sysfs.h"
#include "xe_hw_engine_group.h"
@@ -1108,6 +1109,17 @@ static u32 calc_validate_logical_mask(struct xe_device *xe,
return return_mask;
}
+static bool has_sched_groups(struct xe_gt *gt)
+{
+ if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_sriov_pf_sched_groups_enabled(gt))
+ return true;
+
+ if (IS_SRIOV_VF(gt_to_xe(gt)) && xe_gt_sriov_vf_sched_groups_enabled(gt))
+ return true;
+
+ return false;
+}
+
int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
@@ -1200,6 +1212,13 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
return -ENOENT;
}
+ /* SRIOV sched groups are not compatible with multi-lrc */
+ if (XE_IOCTL_DBG(xe, args->width > 1 && has_sched_groups(hwe->gt))) {
+ up_read(&vm->lock);
+ xe_vm_put(vm);
+ return -EINVAL;
+ }
+
q = xe_exec_queue_create(xe, vm, logical_mask,
args->width, hwe, flags,
args->extensions);
diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
index 247e41c1c48d..e7a50b1348b7 100644
--- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c
+++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
@@ -195,7 +195,8 @@ int main(int argc, const char *argv[])
}
}
- fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn, prefix, prefix);
+ fprintf(args[ARGS_CHEADER].f, HEADER, xbasename(args[ARGS_INPUT].fn),
+ prefix, prefix);
ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f,
args[ARGS_CHEADER].f, prefix);
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 94969ddd9d88..de7e47763411 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -20,7 +20,17 @@
for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
xe_hw_engine_is_valid((hwe__)))
-#define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0)
+#define XE_ENGINE_INSTANCES_FROM_MASK(gt, NAME) \
+ (((gt)->info.engine_mask & XE_HW_ENGINE_##NAME##_MASK) >> XE_HW_ENGINE_##NAME##0)
+
+#define RCS_INSTANCES(gt) XE_ENGINE_INSTANCES_FROM_MASK(gt, RCS)
+#define VCS_INSTANCES(gt) XE_ENGINE_INSTANCES_FROM_MASK(gt, VCS)
+#define VECS_INSTANCES(gt) XE_ENGINE_INSTANCES_FROM_MASK(gt, VECS)
+#define CCS_INSTANCES(gt) XE_ENGINE_INSTANCES_FROM_MASK(gt, CCS)
+#define GSCCS_INSTANCES(gt) XE_ENGINE_INSTANCES_FROM_MASK(gt, GSCCS)
+
+/* Our devices have up to 4 media slices */
+#define MAX_MEDIA_SLICES 4
#define GT_VER(gt) ({ \
typeof(gt) gt_ = (gt); \
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index 50fffc9ebf62..91ac22ef5703 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -17,7 +17,7 @@
static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
{
u32 mode = CCS_MODE_CSLICE_0_3_MASK; /* disable all by default */
- int num_slices = hweight32(CCS_MASK(gt));
+ int num_slices = hweight32(CCS_INSTANCES(gt));
struct xe_device *xe = gt_to_xe(gt);
int width, cslice = 0;
u32 config = 0;
@@ -59,7 +59,7 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
config |= BIT(hwe->instance) << XE_HW_ENGINE_CCS0;
/* If a slice is fused off, leave disabled */
- while ((CCS_MASK(gt) & BIT(cslice)) == 0)
+ while ((CCS_INSTANCES(gt) & BIT(cslice)) == 0)
cslice++;
mode &= ~CCS_MODE_CSLICE(cslice, CCS_MODE_CSLICE_MASK);
@@ -94,7 +94,7 @@ num_cslices_show(struct device *kdev,
{
struct xe_gt *gt = kobj_to_gt(&kdev->kobj);
- return sysfs_emit(buf, "%u\n", hweight32(CCS_MASK(gt)));
+ return sysfs_emit(buf, "%u\n", hweight32(CCS_INSTANCES(gt)));
}
static DEVICE_ATTR_RO(num_cslices);
@@ -131,7 +131,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
* Ensure number of engines specified is valid and there is an
* exact multiple of engines for slices.
*/
- num_slices = hweight32(CCS_MASK(gt));
+ num_slices = hweight32(CCS_INSTANCES(gt));
if (!num_engines || num_engines > num_slices || num_slices % num_engines) {
xe_gt_dbg(gt, "Invalid compute config, %d engines %d slices\n",
num_engines, num_slices);
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
index f8779852cf0d..ef3b853f5c8c 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
@@ -17,7 +17,7 @@ int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt);
static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt)
{
/* Check if there are more than one compute engines available */
- return hweight32(CCS_MASK(gt)) > 1;
+ return hweight32(CCS_INSTANCES(gt)) > 1;
}
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index 0714c758b9c1..fb5c9101e275 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -14,6 +14,7 @@
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_helpers.h"
#include "xe_gt_sriov_pf_migration.h"
+#include "xe_gt_sriov_pf_policy.h"
#include "xe_gt_sriov_pf_service.h"
#include "xe_gt_sriov_printk.h"
#include "xe_guc_submit.h"
@@ -123,6 +124,8 @@ int xe_gt_sriov_pf_init(struct xe_gt *gt)
if (err)
return err;
+ xe_gt_sriov_pf_policy_init(gt);
+
err = xe_gt_sriov_pf_migration_init(gt);
if (err)
return err;
@@ -281,3 +284,20 @@ int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt)
pf_flush_restart(gt);
return 0;
}
+
+/**
+ * xe_gt_sriov_pf_sched_groups_enabled - Check if multiple scheduler groups are
+ * enabled
+ * @gt: the &xe_gt
+ *
+ * This function is for PF use only.
+ *
+ * Return: true if shed groups were enabled, false otherwise.
+ */
+bool xe_gt_sriov_pf_sched_groups_enabled(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ return xe_gt_sriov_pf_policy_sched_groups_enabled(gt);
+}
+
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
index e7fde3f9937a..1ccfc7137b98 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -6,6 +6,8 @@
#ifndef _XE_GT_SRIOV_PF_H_
#define _XE_GT_SRIOV_PF_H_
+#include <linux/types.h>
+
struct xe_gt;
#ifdef CONFIG_PCI_IOV
@@ -16,6 +18,7 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid);
void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt);
void xe_gt_sriov_pf_restart(struct xe_gt *gt);
+bool xe_gt_sriov_pf_sched_groups_enabled(struct xe_gt *gt);
#else
static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
{
@@ -38,6 +41,11 @@ static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt)
static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt)
{
}
+
+static inline bool xe_gt_sriov_pf_sched_groups_enabled(struct xe_gt *gt)
+{
+ return false;
+}
#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 6e8507c24986..5a870914b102 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -195,6 +195,25 @@ static int pf_push_vf_cfg_dbs(struct xe_gt *gt, unsigned int vfid, u32 begin, u3
return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs));
}
+static int pf_push_vf_grp_cfg_u32(struct xe_gt *gt, unsigned int vfid,
+ u16 key, const u32 *values, u32 count)
+{
+ CLASS(xe_guc_buf, buf)(&gt->uc.guc.buf, GUC_KLV_LEN_MIN + GUC_MAX_SCHED_GROUPS);
+ u32 *klv;
+
+ xe_gt_assert(gt, count && count <= GUC_MAX_SCHED_GROUPS);
+
+ if (!xe_guc_buf_is_valid(buf))
+ return -ENOBUFS;
+
+ klv = xe_guc_buf_cpu_ptr(buf);
+
+ klv[0] = FIELD_PREP(GUC_KLV_0_KEY, key) | FIELD_PREP(GUC_KLV_0_LEN, count);
+ memcpy(&klv[1], values, count * sizeof(u32));
+
+ return pf_push_vf_buf_klvs(gt, vfid, 1, buf, GUC_KLV_LEN_MIN + count);
+}
+
static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 *exec_quantum)
{
/* GuC will silently clamp values exceeding max */
@@ -268,6 +287,37 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config,
return encode_ggtt(cfg, node->base.start, node->base.size, details);
}
+static u32 encode_config_sched(struct xe_gt *gt, u32 *cfg, u32 n,
+ const struct xe_gt_sriov_config *config)
+{
+ int i;
+
+ if (xe_sriov_gt_pf_policy_has_multi_group_modes(gt)) {
+ BUILD_BUG_ON(ARRAY_SIZE(config->exec_quantum) >
+ GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_MAX_LEN);
+ BUILD_BUG_ON(ARRAY_SIZE(config->preempt_timeout) >
+ GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_MAX_LEN);
+
+ cfg[n++] = PREP_GUC_KLV_CONST(GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_KEY,
+ ARRAY_SIZE(config->exec_quantum));
+ for (i = 0; i < ARRAY_SIZE(config->exec_quantum); i++)
+ cfg[n++] = config->exec_quantum[i];
+
+ cfg[n++] = PREP_GUC_KLV_CONST(GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_KEY,
+ ARRAY_SIZE(config->preempt_timeout));
+ for (i = 0; i < ARRAY_SIZE(config->preempt_timeout); i++)
+ cfg[n++] = config->preempt_timeout[i];
+ } else {
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM);
+ cfg[n++] = config->exec_quantum[0];
+
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT);
+ cfg[n++] = config->preempt_timeout[0];
+ }
+
+ return n;
+}
+
/* Return: number of configuration dwords written */
static u32 encode_config(struct xe_gt *gt, u32 *cfg, const struct xe_gt_sriov_config *config,
bool details)
@@ -298,11 +348,7 @@ static u32 encode_config(struct xe_gt *gt, u32 *cfg, const struct xe_gt_sriov_co
cfg[n++] = upper_32_bits(xe_bo_size(config->lmem_obj));
}
- cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM);
- cfg[n++] = config->exec_quantum;
-
- cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT);
- cfg[n++] = config->preempt_timeout;
+ n = encode_config_sched(gt, cfg, n, config);
#define encode_threshold_config(TAG, NAME, VER...) ({ \
if (IF_ARGS(GUC_FIRMWARE_VER_AT_LEAST(&gt->uc.guc, VER), true, VER)) { \
@@ -976,6 +1022,33 @@ static int pf_config_set_u32_done(struct xe_gt *gt, unsigned int vfid, u32 value
return 0;
}
+static char *to_group_name(const char *what, u8 group, char *buf, size_t size)
+{
+ snprintf(buf, size, "group%u%s%s", group, what ? " " : "", what ?: "");
+ return buf;
+}
+
+static int
+pf_groups_cfg_set_u32_done(struct xe_gt *gt, unsigned int vfid, u32 *values, u32 count,
+ void (*get_actual)(struct xe_gt *, unsigned int, u32 *, u32),
+ const char *what, const char *(*unit)(u32), int err)
+{
+ u32 actual[GUC_MAX_SCHED_GROUPS];
+ char group_name[32];
+ u8 g;
+
+ xe_gt_assert(gt, count <= ARRAY_SIZE(actual));
+
+ get_actual(gt, vfid, actual, count);
+
+ for (g = 0; g < count; g++)
+ pf_config_set_u32_done(gt, vfid, values[g], actual[g],
+ to_group_name(what, g, group_name, sizeof(group_name)),
+ unit, err);
+
+ return err;
+}
+
/**
* xe_gt_sriov_pf_config_set_ctxs - Configure GuC contexts IDs quota for the VF.
* @gt: the &xe_gt
@@ -1860,12 +1933,15 @@ static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid,
{
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
int err;
+ int i;
err = pf_push_vf_cfg_exec_quantum(gt, vfid, &exec_quantum);
if (unlikely(err))
return err;
- config->exec_quantum = exec_quantum;
+ for (i = 0; i < ARRAY_SIZE(config->exec_quantum); i++)
+ config->exec_quantum[i] = exec_quantum;
+
return 0;
}
@@ -1873,7 +1949,7 @@ static u32 pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid)
{
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
- return config->exec_quantum;
+ return config->exec_quantum[0];
}
/**
@@ -1980,6 +2056,88 @@ int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exe
exec_quantum_unit, n, err);
}
+static int pf_provision_groups_exec_quantums(struct xe_gt *gt, unsigned int vfid,
+ const u32 *exec_quantums, u32 count)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ int err;
+ int i;
+
+ err = pf_push_vf_grp_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_KEY,
+ exec_quantums, count);
+ if (unlikely(err))
+ return err;
+
+ /*
+ * GuC silently clamps values exceeding the max and zeroes out the
+ * quantum for groups not in the klv payload
+ */
+ for (i = 0; i < ARRAY_SIZE(config->exec_quantum); i++) {
+ if (i < count)
+ config->exec_quantum[i] = min_t(u32, exec_quantums[i],
+ GUC_KLV_VF_CFG_EXEC_QUANTUM_MAX_VALUE);
+ else
+ config->exec_quantum[i] = 0;
+ }
+
+ return 0;
+}
+
+static void pf_get_groups_exec_quantums(struct xe_gt *gt, unsigned int vfid,
+ u32 *exec_quantums, u32 max_count)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ u32 count = min_t(u32, max_count, ARRAY_SIZE(config->exec_quantum));
+
+ memcpy(exec_quantums, config->exec_quantum, sizeof(u32) * count);
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_groups_exec_quantums() - Configure PF/VF EQs for sched groups.
+ * @gt: the &xe_gt
+ * @vfid: the PF or VF identifier
+ * @exec_quantums: array of requested EQs in milliseconds (0 is infinity)
+ * @count: number of entries in the array
+ *
+ * This function can only be called on PF.
+ * It will log the provisioned value or an error in case of the failure.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_groups_exec_quantums(struct xe_gt *gt, unsigned int vfid,
+ u32 *exec_quantums, u32 count)
+{
+ int err;
+
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
+
+ err = pf_provision_groups_exec_quantums(gt, vfid, exec_quantums, count);
+
+ return pf_groups_cfg_set_u32_done(gt, vfid, exec_quantums, count,
+ pf_get_groups_exec_quantums,
+ "execution quantum",
+ exec_quantum_unit, err);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_groups_exec_quantums() - Get PF/VF sched groups EQs
+ * @gt: the &xe_gt
+ * @vfid: the PF or VF identifier
+ * @exec_quantums: array in which to store the execution quantums values
+ * @count: maximum number of entries to store
+ *
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_config_get_groups_exec_quantums(struct xe_gt *gt, unsigned int vfid,
+ u32 *exec_quantums, u32 count)
+{
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
+
+ xe_gt_assert(gt, count <= GUC_MAX_SCHED_GROUPS);
+
+ pf_get_groups_exec_quantums(gt, vfid, exec_quantums, count);
+}
+
static const char *preempt_timeout_unit(u32 preempt_timeout)
{
return preempt_timeout ? "us" : "(infinity)";
@@ -1990,12 +2148,14 @@ static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
{
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
int err;
+ int i;
err = pf_push_vf_cfg_preempt_timeout(gt, vfid, &preempt_timeout);
if (unlikely(err))
return err;
- config->preempt_timeout = preempt_timeout;
+ for (i = 0; i < ARRAY_SIZE(config->preempt_timeout); i++)
+ config->preempt_timeout[i] = preempt_timeout;
return 0;
}
@@ -2004,7 +2164,7 @@ static u32 pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid)
{
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
- return config->preempt_timeout;
+ return config->preempt_timeout[0];
}
/**
@@ -2110,6 +2270,89 @@ int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32
preempt_timeout_unit, n, err);
}
+static int pf_provision_groups_preempt_timeouts(struct xe_gt *gt, unsigned int vfid,
+ const u32 *preempt_timeouts, u32 count)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ int err;
+ int i;
+
+ err = pf_push_vf_grp_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_KEY,
+ preempt_timeouts, count);
+ if (unlikely(err))
+ return err;
+
+ /*
+ * GuC silently clamps values exceeding the max and zeroes out the
+ * quantum for groups not in the klv payload
+ */
+ for (i = 0; i < ARRAY_SIZE(config->preempt_timeout); i++) {
+ if (i < count)
+ config->preempt_timeout[i] =
+ min_t(u32, preempt_timeouts[i],
+ GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_MAX_VALUE);
+ else
+ config->preempt_timeout[i] = 0;
+ }
+
+ return 0;
+}
+
+static void pf_get_groups_preempt_timeouts(struct xe_gt *gt, unsigned int vfid,
+ u32 *preempt_timeouts, u32 max_count)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+ u32 count = min_t(u32, max_count, ARRAY_SIZE(config->preempt_timeout));
+
+ memcpy(preempt_timeouts, config->preempt_timeout, sizeof(u32) * count);
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_groups_preempt_timeouts() - Configure PF/VF PTs for sched groups.
+ * @gt: the &xe_gt
+ * @vfid: the PF or VF identifier
+ * @preempt_timeouts: array of requested PTs in microseconds (0 is infinity)
+ * @count: number of entries in the array
+ *
+ * This function can only be called on PF.
+ * It will log the provisioned value or an error in case of the failure.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_groups_preempt_timeouts(struct xe_gt *gt, unsigned int vfid,
+ u32 *preempt_timeouts, u32 count)
+{
+ int err;
+
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
+
+ err = pf_provision_groups_preempt_timeouts(gt, vfid, preempt_timeouts, count);
+
+ return pf_groups_cfg_set_u32_done(gt, vfid, preempt_timeouts, count,
+ pf_get_groups_preempt_timeouts,
+ "preempt_timeout",
+ preempt_timeout_unit, err);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_groups_preempt_timeouts() - Get PF/VF sched groups PTs
+ * @gt: the &xe_gt
+ * @vfid: the PF or VF identifier
+ * @preempt_timeouts: array in which to store the preemption timeouts values
+ * @count: maximum number of entries to store
+ *
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_config_get_groups_preempt_timeouts(struct xe_gt *gt, unsigned int vfid,
+ u32 *preempt_timeouts, u32 count)
+{
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
+
+ xe_gt_assert(gt, count <= GUC_MAX_SCHED_GROUPS);
+
+ pf_get_groups_preempt_timeouts(gt, vfid, preempt_timeouts, count);
+}
+
static const char *sched_priority_unit(u32 priority)
{
return priority == GUC_SCHED_PRIORITY_LOW ? "(low)" :
@@ -2183,10 +2426,14 @@ u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid
static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *config)
{
+ int i;
+
lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
- config->exec_quantum = 0;
- config->preempt_timeout = 0;
+ for (i = 0; i < ARRAY_SIZE(config->exec_quantum); i++) {
+ config->exec_quantum[i] = 0;
+ config->preempt_timeout[i] = 0;
+ }
}
static int pf_provision_threshold(struct xe_gt *gt, unsigned int vfid,
@@ -2548,6 +2795,16 @@ static int pf_restore_vf_config_klv(struct xe_gt *gt, unsigned int vfid,
return -EBADMSG;
return pf_provision_exec_quantum(gt, vfid, value[0]);
+ case GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_KEY:
+ if (len > GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_MAX_LEN)
+ return -EBADMSG;
+ return pf_provision_groups_exec_quantums(gt, vfid, value, len);
+
+ case GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_KEY:
+ if (len > GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_MAX_LEN)
+ return -EBADMSG;
+ return pf_provision_groups_preempt_timeouts(gt, vfid, value, len);
+
case GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY:
if (len != GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN)
return -EBADMSG;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
index 4975730423d7..3c6c8b6655af 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
@@ -46,6 +46,11 @@ int xe_gt_sriov_pf_config_set_exec_quantum_locked(struct xe_gt *gt, unsigned int
u32 exec_quantum);
int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exec_quantum);
+void xe_gt_sriov_pf_config_get_groups_exec_quantums(struct xe_gt *gt, unsigned int vfid,
+ u32 *exec_quantum, u32 max_count);
+int xe_gt_sriov_pf_config_set_groups_exec_quantums(struct xe_gt *gt, unsigned int vfid,
+ u32 *exec_quantum, u32 count);
+
u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
u32 preempt_timeout);
@@ -55,6 +60,11 @@ int xe_gt_sriov_pf_config_set_preempt_timeout_locked(struct xe_gt *gt, unsigned
u32 preempt_timeout);
int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32 preempt_timeout);
+void xe_gt_sriov_pf_config_get_groups_preempt_timeouts(struct xe_gt *gt, unsigned int vfid,
+ u32 *preempt_timeout, u32 max_count);
+int xe_gt_sriov_pf_config_set_groups_preempt_timeouts(struct xe_gt *gt, unsigned int vfid,
+ u32 *preempt_timeout, u32 count);
+
u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
index 686c7b3b6d7a..75a48d0fa859 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
@@ -6,6 +6,7 @@
#ifndef _XE_GT_SRIOV_PF_CONFIG_TYPES_H_
#define _XE_GT_SRIOV_PF_CONFIG_TYPES_H_
+#include "abi/guc_scheduler_abi.h"
#include "xe_ggtt_types.h"
#include "xe_guc_klv_thresholds_set_types.h"
@@ -30,9 +31,9 @@ struct xe_gt_sriov_config {
/** @begin_db: start index of GuC doorbell ID range. */
u16 begin_db;
/** @exec_quantum: execution-quantum in milliseconds. */
- u32 exec_quantum;
+ u32 exec_quantum[GUC_MAX_SCHED_GROUPS];
/** @preempt_timeout: preemption timeout in microseconds. */
- u32 preempt_timeout;
+ u32 preempt_timeout[GUC_MAX_SCHED_GROUPS];
/** @sched_priority: scheduling priority. */
u32 sched_priority;
/** @thresholds: GuC thresholds for adverse events notifications. */
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index ece9eed5d7c5..47d288c53539 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -160,6 +160,299 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent)
* /sys/kernel/debug/dri/BDF/
* ├── sriov
* : ├── pf
+ * : ├── tile0
+ * : ├── gt0
+ * : ├── sched_groups_mode
+ * ├── sched_groups_exec_quantums_ms
+ * ├── sched_groups_preempt_timeout_us
+ * ├── sched_groups
+ * : ├── group0
+ * :
+ * : └── groupN
+ * ├── vf1
+ * : ├── tile0
+ * : ├── gt0
+ * : ├── sched_groups_exec_quantums_ms
+ * ├── sched_groups_preempt_timeout_us
+ * :
+ */
+
+static const char *sched_group_mode_to_string(enum xe_sriov_sched_group_modes mode)
+{
+ switch (mode) {
+ case XE_SRIOV_SCHED_GROUPS_DISABLED:
+ return "disabled";
+ case XE_SRIOV_SCHED_GROUPS_MEDIA_SLICES:
+ return "media_slices";
+ case XE_SRIOV_SCHED_GROUPS_MODES_COUNT:
+ /* dummy mode to make the compiler happy */
+ break;
+ }
+
+ return "unknown";
+}
+
+static int sched_groups_info(struct seq_file *m, void *data)
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+ struct xe_gt *gt = extract_gt(m->private);
+ enum xe_sriov_sched_group_modes current_mode =
+ gt->sriov.pf.policy.guc.sched_groups.current_mode;
+ enum xe_sriov_sched_group_modes mode;
+
+ for (mode = XE_SRIOV_SCHED_GROUPS_DISABLED;
+ mode < XE_SRIOV_SCHED_GROUPS_MODES_COUNT;
+ mode++) {
+ if (!xe_sriov_gt_pf_policy_has_sched_group_mode(gt, mode))
+ continue;
+
+ drm_printf(&p, "%s%s%s%s",
+ mode == XE_SRIOV_SCHED_GROUPS_DISABLED ? "" : " ",
+ mode == current_mode ? "[" : "",
+ sched_group_mode_to_string(mode),
+ mode == current_mode ? "]" : "");
+ }
+
+ drm_puts(&p, "\n");
+
+ return 0;
+}
+
+static int sched_groups_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, sched_groups_info, inode->i_private);
+}
+
+static ssize_t sched_groups_write(struct file *file, const char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_gt *gt = extract_gt(file_inode(file)->i_private);
+ enum xe_sriov_sched_group_modes mode;
+ char name[32];
+ int ret;
+
+ if (*pos)
+ return -ESPIPE;
+
+ if (!size)
+ return -ENODATA;
+
+ if (size > sizeof(name) - 1)
+ return -EINVAL;
+
+ ret = simple_write_to_buffer(name, sizeof(name) - 1, pos, ubuf, size);
+ if (ret < 0)
+ return ret;
+ name[ret] = '\0';
+
+ for (mode = XE_SRIOV_SCHED_GROUPS_DISABLED;
+ mode < XE_SRIOV_SCHED_GROUPS_MODES_COUNT;
+ mode++)
+ if (sysfs_streq(name, sched_group_mode_to_string(mode)))
+ break;
+
+ if (mode == XE_SRIOV_SCHED_GROUPS_MODES_COUNT)
+ return -EINVAL;
+
+ guard(xe_pm_runtime)(gt_to_xe(gt));
+ ret = xe_gt_sriov_pf_policy_set_sched_groups_mode(gt, mode);
+
+ return ret < 0 ? ret : size;
+}
+
+static const struct file_operations sched_groups_fops = {
+ .owner = THIS_MODULE,
+ .open = sched_groups_open,
+ .read = seq_read,
+ .write = sched_groups_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int sched_groups_config_show(struct seq_file *m, void *data,
+ void (*get)(struct xe_gt *, unsigned int, u32 *, u32))
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+ unsigned int vfid = extract_vfid(m->private);
+ struct xe_gt *gt = extract_gt(m->private);
+ u32 values[GUC_MAX_SCHED_GROUPS];
+ bool first = true;
+ u8 group;
+
+ get(gt, vfid, values, ARRAY_SIZE(values));
+
+ for (group = 0; group < ARRAY_SIZE(values); group++) {
+ drm_printf(&p, "%s%u", first ? "" : ",", values[group]);
+
+ first = false;
+ }
+
+ drm_puts(&p, "\n");
+
+ return 0;
+}
+
+static ssize_t sched_groups_config_write(struct file *file, const char __user *ubuf,
+ size_t size, loff_t *pos,
+ int (*set)(struct xe_gt *, unsigned int, u32 *, u32))
+{
+ struct dentry *parent = file_inode(file)->i_private;
+ unsigned int vfid = extract_vfid(parent);
+ struct xe_gt *gt = extract_gt(parent);
+ u32 values[GUC_MAX_SCHED_GROUPS];
+ int *input __free(kfree) = NULL;
+ u32 count;
+ int ret;
+ int i;
+
+ if (*pos)
+ return -ESPIPE;
+
+ if (!size)
+ return -ENODATA;
+
+ ret = parse_int_array_user(ubuf, min(size, GUC_MAX_SCHED_GROUPS * sizeof(u32)), &input);
+ if (ret)
+ return ret;
+
+ count = input[0];
+ if (count > GUC_MAX_SCHED_GROUPS)
+ return -E2BIG;
+
+ for (i = 0; i < count; i++) {
+ if (input[i + 1] < 0 || input[i + 1] > S32_MAX)
+ return -EINVAL;
+
+ values[i] = input[i + 1];
+ }
+
+ guard(xe_pm_runtime)(gt_to_xe(gt));
+ ret = set(gt, vfid, values, count);
+
+ return ret < 0 ? ret : size;
+}
+
+#define DEFINE_SRIOV_GT_GRP_CFG_DEBUGFS_ATTRIBUTE(CONFIG) \
+static int sched_groups_##CONFIG##_show(struct seq_file *m, void *data) \
+{ \
+ return sched_groups_config_show(m, data, \
+ xe_gt_sriov_pf_config_get_groups_##CONFIG); \
+} \
+ \
+static int sched_groups_##CONFIG##_open(struct inode *inode, struct file *file) \
+{ \
+ return single_open(file, sched_groups_##CONFIG##_show, \
+ inode->i_private); \
+} \
+ \
+static ssize_t sched_groups_##CONFIG##_write(struct file *file, \
+ const char __user *ubuf, \
+ size_t size, loff_t *pos) \
+{ \
+ return sched_groups_config_write(file, ubuf, size, pos, \
+ xe_gt_sriov_pf_config_set_groups_##CONFIG); \
+} \
+ \
+static const struct file_operations sched_groups_##CONFIG##_fops = { \
+ .owner = THIS_MODULE, \
+ .open = sched_groups_##CONFIG##_open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .write = sched_groups_##CONFIG##_write, \
+ .release = single_release, \
+}
+
+DEFINE_SRIOV_GT_GRP_CFG_DEBUGFS_ATTRIBUTE(exec_quantums);
+DEFINE_SRIOV_GT_GRP_CFG_DEBUGFS_ATTRIBUTE(preempt_timeouts);
+
+static ssize_t sched_group_engines_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct dentry *dent = file_dentry(file);
+ struct xe_gt *gt = extract_gt(dent->d_parent->d_parent);
+ struct xe_gt_sriov_scheduler_groups *info = &gt->sriov.pf.policy.guc.sched_groups;
+ struct guc_sched_group *groups = info->modes[info->current_mode].groups;
+ u32 num_groups = info->modes[info->current_mode].num_groups;
+ unsigned int group = (uintptr_t)extract_priv(dent);
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ char engines[128];
+
+ engines[0] = '\0';
+
+ if (group < num_groups) {
+ for_each_hw_engine(hwe, gt, id) {
+ u8 guc_class = xe_engine_class_to_guc_class(hwe->class);
+ u32 mask = groups[group].engines[guc_class];
+
+ if (mask & BIT(hwe->logical_instance)) {
+ strlcat(engines, hwe->name, sizeof(engines));
+ strlcat(engines, " ", sizeof(engines));
+ }
+ }
+ strlcat(engines, "\n", sizeof(engines));
+ }
+
+ return simple_read_from_buffer(buf, count, ppos, engines, strlen(engines));
+}
+
+static const struct file_operations sched_group_engines_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = sched_group_engines_read,
+ .llseek = default_llseek,
+};
+
+static void pf_add_sched_groups(struct xe_gt *gt, struct dentry *parent, unsigned int vfid)
+{
+ struct dentry *groups;
+ u8 group;
+
+ xe_gt_assert(gt, gt == extract_gt(parent));
+ xe_gt_assert(gt, vfid == extract_vfid(parent));
+
+ /*
+ * TODO: we currently call this function before we initialize scheduler
+ * groups, so at this point in time we don't know if there are any
+ * valid groups on the GT and we can't selectively register the debugfs
+ * only if there are any. Therefore, we always register the debugfs
+ * files if we're on a platform that has support for groups.
+ * We should rework the flow so that debugfs is registered after the
+ * policy init, so that we check if there are valid groups before
+ * adding the debugfs files.
+ * Similarly, instead of using GUC_MAX_SCHED_GROUPS we could use
+ * gt->sriov.pf.policy.guc.sched_groups.max_number_of_groups.
+ */
+ if (!xe_sriov_gt_pf_policy_has_sched_groups_support(gt))
+ return;
+
+ debugfs_create_file("sched_groups_exec_quantums_ms", 0644, parent, parent,
+ &sched_groups_exec_quantums_fops);
+ debugfs_create_file("sched_groups_preempt_timeouts_us", 0644, parent, parent,
+ &sched_groups_preempt_timeouts_fops);
+
+ if (vfid != PFID)
+ return;
+
+ debugfs_create_file("sched_groups_mode", 0644, parent, parent, &sched_groups_fops);
+
+ groups = debugfs_create_dir("sched_groups", parent);
+ if (IS_ERR(groups))
+ return;
+
+ for (group = 0; group < GUC_MAX_SCHED_GROUPS; group++) {
+ char name[10];
+
+ snprintf(name, sizeof(name), "group%u", group);
+ debugfs_create_file(name, 0644, groups, (void *)(uintptr_t)group,
+ &sched_group_engines_fops);
+ }
+}
+
+/*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * : ├── pf
* │ ├── tile0
* │ : ├── gt0
* │ : ├── doorbells_spare
@@ -518,6 +811,7 @@ static void pf_populate_gt(struct xe_gt *gt, struct dentry *dent, unsigned int v
if (vfid) {
pf_add_config_attrs(gt, dent, vfid);
+ pf_add_sched_groups(gt, dent, vfid);
debugfs_create_file("control", 0600, dent, NULL, &control_ops);
@@ -531,6 +825,7 @@ static void pf_populate_gt(struct xe_gt *gt, struct dentry *dent, unsigned int v
} else {
pf_add_config_attrs(gt, dent, PFID);
pf_add_policy_attrs(gt, dent);
+ pf_add_sched_groups(gt, dent, PFID);
drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), dent, minor);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c
index 4445f660e6d1..c28606ca6623 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c
@@ -3,6 +3,8 @@
* Copyright © 2023-2024 Intel Corporation
*/
+#include <drm/drm_managed.h>
+
#include "abi/guc_actions_sriov_abi.h"
#include "xe_bo.h"
@@ -10,9 +12,11 @@
#include "xe_gt_sriov_pf_helpers.h"
#include "xe_gt_sriov_pf_policy.h"
#include "xe_gt_sriov_printk.h"
+#include "xe_guc.h"
#include "xe_guc_buf.h"
#include "xe_guc_ct.h"
#include "xe_guc_klv_helpers.h"
+#include "xe_guc_submit.h"
#include "xe_pm.h"
/*
@@ -94,6 +98,23 @@ static int pf_push_policy_u32(struct xe_gt *gt, u16 key, u32 value)
return pf_push_policy_klvs(gt, 1, klv, ARRAY_SIZE(klv));
}
+static int pf_push_policy_payload(struct xe_gt *gt, u16 key, void *payload, u32 num_dwords)
+{
+ CLASS(xe_guc_buf, buf)(&gt->uc.guc.buf, GUC_KLV_LEN_MIN + num_dwords);
+ u32 *klv;
+
+ if (!xe_guc_buf_is_valid(buf))
+ return -ENOBUFS;
+
+ klv = xe_guc_buf_cpu_ptr(buf);
+
+ klv[0] = PREP_GUC_KLV(key, num_dwords);
+ if (num_dwords)
+ memcpy(&klv[1], payload, num_dwords * sizeof(u32));
+
+ return pf_push_policy_buf_klvs(gt, 1, buf, GUC_KLV_LEN_MIN + num_dwords);
+}
+
static int pf_update_policy_bool(struct xe_gt *gt, u16 key, bool *policy, bool value)
{
int err;
@@ -351,11 +372,306 @@ u32 xe_gt_sriov_pf_policy_get_sample_period(struct xe_gt *gt)
return value;
}
+static void pf_sched_group_media_slices(struct xe_gt *gt, struct guc_sched_group **groups,
+ u32 *num_groups)
+{
+ u8 slice_to_group[MAX_MEDIA_SLICES];
+ u32 vecs_mask = VECS_INSTANCES(gt);
+ u32 gsc_mask = GSCCS_INSTANCES(gt);
+ u32 vcs_mask = VCS_INSTANCES(gt);
+ struct guc_sched_group *values;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ int group = 0;
+ int slice;
+
+ xe_gt_assert(gt, xe_gt_is_media_type(gt));
+
+ /*
+ * Post-BMG the matching of video engines to slices changes, so for now
+ * we don't allow this mode on those platforms.
+ */
+ if (gt_to_xe(gt)->info.platform > XE_BATTLEMAGE)
+ return;
+
+ /*
+ * On BMG and older platforms a media slice has 2 VCS and a VECS. We
+ * bundle the GSC with the first slice.
+ */
+ for (slice = 0; slice < MAX_MEDIA_SLICES; slice++) {
+ if ((vcs_mask & 0x3) || (vecs_mask & 0x1) || (gsc_mask & 0x1))
+ slice_to_group[slice] = group++;
+
+ vcs_mask >>= 2;
+ vecs_mask >>= 1;
+ gsc_mask >>= 1;
+ }
+
+ xe_gt_assert(gt, !vcs_mask);
+ xe_gt_assert(gt, !vecs_mask);
+ xe_gt_assert(gt, !gsc_mask);
+
+ /* We need at least 2 slices to split them up */
+ if (group < 2)
+ return;
+
+ /*
+ * If we have more groups than the GuC can support then we don't want to
+ * expose this specific mode, because the GuC will return an error if we
+ * try to enable it.
+ */
+ if (group > gt->sriov.pf.policy.guc.sched_groups.max_groups) {
+ xe_gt_sriov_notice(gt, "media_slice mode has too many groups: %u vs %u\n",
+ group, gt->sriov.pf.policy.guc.sched_groups.max_groups);
+ return;
+ }
+
+ /* The GuC expects an array with a guc_sched_group entry for each group */
+ values = drmm_kcalloc(&gt_to_xe(gt)->drm, group, sizeof(struct guc_sched_group),
+ GFP_KERNEL);
+ if (!values)
+ return;
+
+ for_each_hw_engine(hwe, gt, id) {
+ u8 guc_class = xe_engine_class_to_guc_class(hwe->class);
+
+ switch (hwe->class) {
+ case XE_ENGINE_CLASS_VIDEO_DECODE:
+ slice = hwe->instance / 2;
+ break;
+ case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ slice = hwe->instance;
+ break;
+ case XE_ENGINE_CLASS_OTHER:
+ slice = 0;
+ break;
+ default:
+ xe_gt_assert_msg(gt, false,
+ "unknown media gt class %u (%s) during EGS setup\n",
+ hwe->class, hwe->name);
+ slice = 0;
+ }
+
+ values[slice_to_group[slice]].engines[guc_class] |= BIT(hwe->logical_instance);
+ }
+
+ *groups = values;
+ *num_groups = group;
+}
+
+/**
+ * xe_sriov_gt_pf_policy_has_sched_groups_support() - Checks whether scheduler
+ * groups are supported.
+ * @gt: the &xe_gt
+ *
+ * This function can only be called on PF.
+ *
+ * Return: true if scheduler groups are supported, false otherwise.
+ */
+bool xe_sriov_gt_pf_policy_has_sched_groups_support(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ /*
+ * The GuC supports scheduler groups from v70.53.0, but a fix for it has
+ * been merged in v70.55.1, so we require the latter. The feature is
+ * also only enabled on BMG and newer FW.
+ */
+ return GUC_FIRMWARE_VER_AT_LEAST(&gt->uc.guc, 70, 55, 1) &&
+ gt_to_xe(gt)->info.platform >= XE_BATTLEMAGE;
+}
+
+static void pf_init_sched_groups(struct xe_gt *gt)
+{
+ enum xe_sriov_sched_group_modes m;
+
+ if (!xe_sriov_gt_pf_policy_has_sched_groups_support(gt))
+ return;
+
+ /*
+ * The GuC interface supports up to 8 groups. However, the GuC only
+ * fully allocates resources for a subset of groups, based on the number
+ * of engines and expected usage. The plan is for this to become
+ * queryable via H2G, but for now GuC FW for all devices supports a
+ * maximum of 2 groups so we can just hardcode that.
+ */
+ gt->sriov.pf.policy.guc.sched_groups.max_groups = 2;
+
+ for (m = XE_SRIOV_SCHED_GROUPS_DISABLED + 1; m < XE_SRIOV_SCHED_GROUPS_MODES_COUNT; m++) {
+ u32 *num_groups = &gt->sriov.pf.policy.guc.sched_groups.modes[m].num_groups;
+ struct guc_sched_group **groups =
+ &gt->sriov.pf.policy.guc.sched_groups.modes[m].groups;
+
+ switch (m) {
+ case XE_SRIOV_SCHED_GROUPS_MEDIA_SLICES:
+ /* this mode only has groups on the media GT */
+ if (xe_gt_is_media_type(gt))
+ pf_sched_group_media_slices(gt, groups, num_groups);
+ break;
+ case XE_SRIOV_SCHED_GROUPS_DISABLED:
+ case XE_SRIOV_SCHED_GROUPS_MODES_COUNT:
+ /*
+ * By defining m of type enum xe_sriov_sched_group_modes
+ * we can get the compiler to automatically flag
+ * missing cases if new enum entries are added. However,
+ * to keep the compiler happy we also need to add the
+ * cases that are excluded from the loop.
+ */
+ xe_gt_assert(gt, false);
+ break;
+ }
+
+ xe_gt_assert(gt, *num_groups < GUC_MAX_SCHED_GROUPS);
+
+ if (*num_groups)
+ gt->sriov.pf.policy.guc.sched_groups.supported_modes |= BIT(m);
+ }
+}
+
+/**
+ * xe_sriov_gt_pf_policy_has_multi_group_modes() - check whether the GT supports
+ * any scheduler modes that have multiple groups
+ * @gt: the &xe_gt to check
+ *
+ * This function can only be called on PF.
+ *
+ * Return: true if the GT supports modes with multiple groups, false otherwise.
+ */
+bool xe_sriov_gt_pf_policy_has_multi_group_modes(struct xe_gt *gt)
+{
+ return gt->sriov.pf.policy.guc.sched_groups.supported_modes;
+}
+
+/**
+ * xe_sriov_gt_pf_policy_has_sched_group_mode() - check whether the GT supports
+ * a specific scheduler group mode
+ * @gt: the &xe_gt to check
+ * @mode: the mode to check
+ *
+ * This function can only be called on PF.
+ *
+ * Return: true if the GT supports the specified mode, false otherwise.
+ */
+bool xe_sriov_gt_pf_policy_has_sched_group_mode(struct xe_gt *gt,
+ enum xe_sriov_sched_group_modes mode)
+{
+ if (mode == XE_SRIOV_SCHED_GROUPS_DISABLED)
+ return true;
+
+ return gt->sriov.pf.policy.guc.sched_groups.supported_modes & BIT(mode);
+}
+
+static int __pf_provision_sched_groups(struct xe_gt *gt, enum xe_sriov_sched_group_modes mode)
+{
+ struct guc_sched_group *groups = gt->sriov.pf.policy.guc.sched_groups.modes[mode].groups;
+ u32 num_groups = gt->sriov.pf.policy.guc.sched_groups.modes[mode].num_groups;
+
+ return pf_push_policy_payload(gt, GUC_KLV_VGT_POLICY_ENGINE_GROUP_CONFIG_KEY,
+ groups, num_groups * GUC_MAX_ENGINE_CLASSES);
+}
+
+static int pf_provision_sched_groups(struct xe_gt *gt, enum xe_sriov_sched_group_modes mode)
+{
+ int err;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (!xe_sriov_gt_pf_policy_has_sched_group_mode(gt, mode))
+ return -EINVAL;
+
+ /* already in the desired mode */
+ if (gt->sriov.pf.policy.guc.sched_groups.current_mode == mode)
+ return 0;
+
+ /*
+ * We don't allow changing this with VFs active since it is hard for
+ * VFs to check.
+ */
+ if (xe_sriov_pf_num_vfs(gt_to_xe(gt)))
+ return -EBUSY;
+
+ /*
+ * The GuC silently ignores the setting if any MLRC contexts are
+ * registered. We expect the admin to make sure that all apps that use
+ * MLRC are terminated before scheduler groups are enabled, so this
+ * check is just to make sure that the exec_queue destruction has been
+ * completed.
+ */
+ if (mode != XE_SRIOV_SCHED_GROUPS_DISABLED &&
+ xe_guc_has_registered_mlrc_queues(&gt->uc.guc)) {
+ xe_gt_sriov_notice(gt, "can't enable sched groups with active MLRC queues\n");
+ return -EPERM;
+ }
+
+ err = __pf_provision_sched_groups(gt, mode);
+ if (err)
+ return err;
+
+ gt->sriov.pf.policy.guc.sched_groups.current_mode = mode;
+
+ return 0;
+}
+
+static int pf_reprovision_sched_groups(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ /* We only have something to provision if we have possible groups */
+ if (!xe_sriov_gt_pf_policy_has_multi_group_modes(gt))
+ return 0;
+
+ return __pf_provision_sched_groups(gt, gt->sriov.pf.policy.guc.sched_groups.current_mode);
+}
+
+static void pf_sanitize_sched_groups(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ gt->sriov.pf.policy.guc.sched_groups.current_mode = XE_SRIOV_SCHED_GROUPS_DISABLED;
+}
+
+/**
+ * xe_gt_sriov_pf_policy_set_sched_groups_mode() - Control the 'sched_groups' policy.
+ * @gt: the &xe_gt where to apply the policy
+ * @mode: the sched_group mode to be activated
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_policy_set_sched_groups_mode(struct xe_gt *gt,
+ enum xe_sriov_sched_group_modes mode)
+{
+ if (!xe_sriov_gt_pf_policy_has_multi_group_modes(gt))
+ return -ENODEV;
+
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
+ return pf_provision_sched_groups(gt, mode);
+}
+
+/**
+ * xe_gt_sriov_pf_policy_sched_groups_enabled() - check whether the GT has
+ * multiple scheduler groups enabled
+ * @gt: the &xe_gt to check
+ *
+ * This function can only be called on PF.
+ *
+ * Return: true if the GT has multiple groups enabled, false otherwise.
+ */
+bool xe_gt_sriov_pf_policy_sched_groups_enabled(struct xe_gt *gt)
+{
+ return gt->sriov.pf.policy.guc.sched_groups.current_mode != XE_SRIOV_SCHED_GROUPS_DISABLED;
+}
+
static void pf_sanitize_guc_policies(struct xe_gt *gt)
{
pf_sanitize_sched_if_idle(gt);
pf_sanitize_reset_engine(gt);
pf_sanitize_sample_period(gt);
+ pf_sanitize_sched_groups(gt);
}
/**
@@ -394,6 +710,7 @@ int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset)
err |= pf_reprovision_sched_if_idle(gt);
err |= pf_reprovision_reset_engine(gt);
err |= pf_reprovision_sample_period(gt);
+ err |= pf_reprovision_sched_groups(gt);
mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
xe_pm_runtime_put(gt_to_xe(gt));
@@ -401,6 +718,18 @@ int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset)
return err ? -ENXIO : 0;
}
+/**
+ * xe_gt_sriov_pf_policy_init() - Initializes the SW state of the PF policies.
+ * @gt: the &xe_gt
+ *
+ * This function can only be called on PF. This function does not touch the HW,
+ * but must be called after the engines have been initialized.
+ */
+void xe_gt_sriov_pf_policy_init(struct xe_gt *gt)
+{
+ pf_init_sched_groups(gt);
+}
+
static void print_guc_policies(struct drm_printer *p, struct xe_gt_sriov_guc_policies *policy)
{
drm_printf(p, "%s:\t%s\n",
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h
index 2a5dc33dc6d7..bd73aa58f9ca 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h
@@ -8,6 +8,8 @@
#include <linux/types.h>
+#include "xe_gt_sriov_pf_policy_types.h"
+
struct drm_printer;
struct xe_gt;
@@ -17,7 +19,15 @@ int xe_gt_sriov_pf_policy_set_reset_engine(struct xe_gt *gt, bool enable);
bool xe_gt_sriov_pf_policy_get_reset_engine(struct xe_gt *gt);
int xe_gt_sriov_pf_policy_set_sample_period(struct xe_gt *gt, u32 value);
u32 xe_gt_sriov_pf_policy_get_sample_period(struct xe_gt *gt);
+bool xe_sriov_gt_pf_policy_has_sched_groups_support(struct xe_gt *gt);
+bool xe_sriov_gt_pf_policy_has_multi_group_modes(struct xe_gt *gt);
+bool xe_sriov_gt_pf_policy_has_sched_group_mode(struct xe_gt *gt,
+ enum xe_sriov_sched_group_modes mode);
+int xe_gt_sriov_pf_policy_set_sched_groups_mode(struct xe_gt *gt,
+ enum xe_sriov_sched_group_modes mode);
+bool xe_gt_sriov_pf_policy_sched_groups_enabled(struct xe_gt *gt);
+void xe_gt_sriov_pf_policy_init(struct xe_gt *gt);
void xe_gt_sriov_pf_policy_sanitize(struct xe_gt *gt);
int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset);
int xe_gt_sriov_pf_policy_print(struct xe_gt *gt, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h
index 4de532af135e..97d278190521 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h
@@ -8,16 +8,55 @@
#include <linux/types.h>
+#include "abi/guc_scheduler_abi.h"
+
+/**
+ * enum xe_sriov_sched_group_modes - list of possible scheduler group modes
+ * @XE_SRIOV_SCHED_GROUPS_DISABLED: no separate groups (i.e., all engines in group 0)
+ * @XE_SRIOV_SCHED_GROUPS_MEDIA_SLICES: separate groups for each media slice
+ * @XE_SRIOV_SCHED_GROUPS_MODES_COUNT: number of valid modes
+ */
+enum xe_sriov_sched_group_modes {
+ XE_SRIOV_SCHED_GROUPS_DISABLED = 0,
+ XE_SRIOV_SCHED_GROUPS_MEDIA_SLICES,
+ XE_SRIOV_SCHED_GROUPS_MODES_COUNT /* must be last */
+};
+
+/**
+ * struct xe_gt_sriov_scheduler_groups - Scheduler groups policy info
+ * @max_groups: max number of groups supported by the GuC for the platform
+ * @supported_modes: mask of supported modes
+ * @current_mode: active scheduler groups mode
+ * @modes: array of masks and their number for each mode
+ * @modes.groups: array of engine instance groups in given mode, with each group
+ * consisting of GUC_MAX_ENGINE_CLASSES engine instances masks. A
+ * A NULL value indicates that all the engines are in the same
+ * group for this mode on this GT.
+ * @modes.num_groups: number of groups in given mode, zero if all the engines
+ * are in the same group.
+ */
+struct xe_gt_sriov_scheduler_groups {
+ u8 max_groups;
+ u32 supported_modes;
+ enum xe_sriov_sched_group_modes current_mode;
+ struct {
+ struct guc_sched_group *groups;
+ u32 num_groups;
+ } modes[XE_SRIOV_SCHED_GROUPS_MODES_COUNT];
+};
+
/**
* struct xe_gt_sriov_guc_policies - GuC SR-IOV policies.
* @sched_if_idle: controls strict scheduling policy.
* @reset_engine: controls engines reset on VF switch policy.
* @sample_period: adverse events sampling period (in milliseconds).
+ * @sched_groups: available scheduling group configurations.
*/
struct xe_gt_sriov_guc_policies {
bool sched_if_idle;
bool reset_engine;
u32 sample_period;
+ struct xe_gt_sriov_scheduler_groups sched_groups;
};
/**
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index b8b391cfc8eb..d91c65dc3496 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -612,6 +612,52 @@ static void vf_cache_gmdid(struct xe_gt *gt)
gt->sriov.vf.runtime.gmdid = xe_gt_sriov_vf_gmdid(gt);
}
+static int vf_query_sched_groups(struct xe_gt *gt)
+{
+ struct xe_guc *guc = &gt->uc.guc;
+ struct xe_uc_fw_version guc_version;
+ u32 value = 0;
+ int err;
+
+ xe_gt_sriov_vf_guc_versions(gt, NULL, &guc_version);
+
+ if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 26, 0))
+ return 0;
+
+ err = guc_action_query_single_klv32(guc,
+ GUC_KLV_GLOBAL_CFG_GROUP_SCHEDULING_AVAILABLE_KEY,
+ &value);
+ if (unlikely(err)) {
+ xe_gt_sriov_err(gt, "Failed to obtain sched groups status (%pe)\n",
+ ERR_PTR(err));
+ return err;
+ }
+
+ /* valid values are 0 (disabled) and 1 (enabled) */
+ if (value > 1) {
+ xe_gt_sriov_err(gt, "Invalid sched groups status %u\n", value);
+ return -EPROTO;
+ }
+
+ xe_gt_sriov_dbg(gt, "sched groups %s\n", str_enabled_disabled(value));
+ return value;
+}
+
+static int vf_cache_sched_groups_status(struct xe_gt *gt)
+{
+ int ret;
+
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+ ret = vf_query_sched_groups(gt);
+ if (ret < 0)
+ return ret;
+
+ gt->sriov.vf.runtime.uses_sched_groups = ret;
+
+ return 0;
+}
+
/**
* xe_gt_sriov_vf_query_config - Query SR-IOV config data over MMIO.
* @gt: the &xe_gt
@@ -641,6 +687,10 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt)
if (unlikely(err))
return err;
+ err = vf_cache_sched_groups_status(gt);
+ if (unlikely(err))
+ return err;
+
if (has_gmdid(xe))
vf_cache_gmdid(gt);
@@ -648,6 +698,23 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt)
}
/**
+ * xe_gt_sriov_vf_sched_groups_enabled() - Check if PF has enabled multiple
+ * scheduler groups
+ * @gt: the &xe_gt
+ *
+ * This function is for VF use only.
+ *
+ * Return: true if shed groups were enabled, false otherwise.
+ */
+bool xe_gt_sriov_vf_sched_groups_enabled(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+ xe_gt_assert(gt, gt->sriov.vf.guc_version.major);
+
+ return gt->sriov.vf.runtime.uses_sched_groups;
+}
+
+/**
* xe_gt_sriov_vf_guc_ids - VF GuC context IDs configuration.
* @gt: the &xe_gt
*
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
index af40276790fa..7d97189c2d3d 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -30,6 +30,7 @@ bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt);
u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt);
u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);
u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt);
+bool xe_gt_sriov_vf_sched_groups_enabled(struct xe_gt *gt);
u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg);
void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
index 510c33116fbd..9a6b5672d569 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
@@ -27,6 +27,8 @@ struct xe_gt_sriov_vf_selfconfig {
struct xe_gt_sriov_vf_runtime {
/** @gmdid: cached value of the GDMID register. */
u32 gmdid;
+ /** @uses_sched_groups: whether PF enabled sched groups or not. */
+ bool uses_sched_groups;
/** @regs_size: size of runtime register array. */
u32 regs_size;
/** @num_regs: number of runtime registers in the array. */
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 09ac092c3687..44360437beeb 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -175,7 +175,7 @@ static bool needs_wa_dual_queue(struct xe_gt *gt)
* the DUAL_QUEUE_WA on all newer platforms on GTs that have CCS engines
* to move management back to the GuC.
*/
- if (CCS_MASK(gt) && GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
+ if (CCS_INSTANCES(gt) && GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270)
return true;
return false;
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h b/drivers/gpu/drm/xe/xe_guc_capture.h
index 20a078dc4b85..34d6fdc64f56 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.h
+++ b/drivers/gpu/drm/xe/xe_guc_capture.h
@@ -8,8 +8,8 @@
#include <linux/types.h>
#include "abi/guc_capture_abi.h"
+#include "abi/guc_scheduler_abi.h"
#include "xe_guc.h"
-#include "xe_guc_fwif.h"
struct xe_exec_queue;
struct xe_guc;
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index c3df9b3f1b4d..dfbf76037b04 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -206,7 +206,9 @@ static void g2h_fence_cancel(struct g2h_fence *g2h_fence)
{
g2h_fence->cancel = true;
g2h_fence->fail = true;
- g2h_fence->done = true;
+
+ /* WRITE_ONCE pairs with READ_ONCEs in guc_ct_send_recv. */
+ WRITE_ONCE(g2h_fence->done, true);
}
static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
@@ -527,7 +529,12 @@ static void guc_ct_change_state(struct xe_guc_ct *ct,
if (ct->g2h_outstanding)
xe_pm_runtime_put(ct_to_xe(ct));
ct->g2h_outstanding = 0;
- ct->state = state;
+
+ /*
+ * WRITE_ONCE pairs with READ_ONCEs in xe_guc_ct_initialized and
+ * xe_guc_ct_enabled.
+ */
+ WRITE_ONCE(ct->state, state);
xe_gt_dbg(gt, "GuC CT communication channel %s\n",
state == XE_GUC_CT_STATE_STOPPED ? "stopped" :
@@ -1294,10 +1301,13 @@ retry_same_fence:
return ret;
}
- ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+ /* READ_ONCEs pairs with WRITE_ONCEs in parse_g2h_response
+ * and g2h_fence_cancel.
+ */
+ ret = wait_event_timeout(ct->g2h_fence_wq, READ_ONCE(g2h_fence.done), HZ);
if (!ret) {
LNL_FLUSH_WORK(&ct->g2h_worker);
- if (g2h_fence.done) {
+ if (READ_ONCE(g2h_fence.done)) {
xe_gt_warn(gt, "G2H fence %u, action %04x, done\n",
g2h_fence.seqno, action[0]);
ret = 1;
@@ -1498,7 +1508,8 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
- g2h_fence->done = true;
+ /* WRITE_ONCE pairs with READ_ONCEs in guc_ct_send_recv. */
+ WRITE_ONCE(g2h_fence->done, true);
smp_mb();
wake_up_all(&ct->g2h_fence_wq);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 5599939f8fe1..767365a33dee 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -30,12 +30,14 @@ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb)
static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct)
{
- return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED;
+ /* READ_ONCE pairs with WRITE_ONCE in guc_ct_change_state. */
+ return READ_ONCE(ct->state) != XE_GUC_CT_STATE_NOT_INITIALIZED;
}
static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct)
{
- return ct->state == XE_GUC_CT_STATE_ENABLED;
+ /* READ_ONCE pairs with WRITE_ONCE in guc_ct_change_state. */
+ return READ_ONCE(ct->state) == XE_GUC_CT_STATE_ENABLED;
}
static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index a04faec477ae..a33ea288b907 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -10,6 +10,7 @@
#include "abi/guc_capture_abi.h"
#include "abi/guc_klvs_abi.h"
+#include "abi/guc_scheduler_abi.h"
#include "xe_hw_engine_types.h"
#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4
@@ -19,59 +20,6 @@
#define G2H_LEN_DW_MULTI_QUEUE_CONTEXT 3
#define G2H_LEN_DW_PAGE_RECLAMATION 3
-#define GUC_ID_MAX 65535
-#define GUC_ID_UNKNOWN 0xffffffff
-
-#define GUC_CONTEXT_DISABLE 0
-#define GUC_CONTEXT_ENABLE 1
-
-#define GUC_CLIENT_PRIORITY_KMD_HIGH 0
-#define GUC_CLIENT_PRIORITY_HIGH 1
-#define GUC_CLIENT_PRIORITY_KMD_NORMAL 2
-#define GUC_CLIENT_PRIORITY_NORMAL 3
-#define GUC_CLIENT_PRIORITY_NUM 4
-
-#define GUC_RENDER_ENGINE 0
-#define GUC_VIDEO_ENGINE 1
-#define GUC_BLITTER_ENGINE 2
-#define GUC_VIDEOENHANCE_ENGINE 3
-#define GUC_VIDEO_ENGINE2 4
-#define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1)
-
-#define GUC_RENDER_CLASS 0
-#define GUC_VIDEO_CLASS 1
-#define GUC_VIDEOENHANCE_CLASS 2
-#define GUC_BLITTER_CLASS 3
-#define GUC_COMPUTE_CLASS 4
-#define GUC_GSC_OTHER_CLASS 5
-#define GUC_LAST_ENGINE_CLASS GUC_GSC_OTHER_CLASS
-#define GUC_MAX_ENGINE_CLASSES 16
-#define GUC_MAX_INSTANCES_PER_CLASS 32
-
-#define GUC_CONTEXT_NORMAL 0
-#define GUC_CONTEXT_COMPRESSION_SAVE 1
-#define GUC_CONTEXT_COMPRESSION_RESTORE 2
-#define GUC_CONTEXT_COUNT (GUC_CONTEXT_COMPRESSION_RESTORE + 1)
-
-/* Helper for context registration H2G */
-struct guc_ctxt_registration_info {
- u32 flags;
- u32 context_idx;
- u32 engine_class;
- u32 engine_submit_mask;
- u32 wq_desc_lo;
- u32 wq_desc_hi;
- u32 wq_base_lo;
- u32 wq_base_hi;
- u32 wq_size;
- u32 cgp_lo;
- u32 cgp_hi;
- u32 hwlrca_lo;
- u32 hwlrca_hi;
-};
-#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
-#define CONTEXT_REGISTRATION_FLAG_TYPE GENMASK(2, 1)
-
/* 32-bit KLV structure as used by policy updates and others */
struct guc_klv_generic_dw_t {
u32 kl;
diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
index 146a6eda9e06..97600edda837 100644
--- a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
+++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
@@ -21,11 +21,16 @@
const char *xe_guc_klv_key_to_string(u16 key)
{
switch (key) {
+ /* GuC Global Config KLVs */
+ case GUC_KLV_GLOBAL_CFG_GROUP_SCHEDULING_AVAILABLE_KEY:
+ return "group_scheduling_available";
/* VGT POLICY keys */
case GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY:
return "sched_if_idle";
case GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY:
return "sample_period";
+ case GUC_KLV_VGT_POLICY_ENGINE_GROUP_CONFIG_KEY:
+ return "engine_group_config";
case GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY:
return "reset_engine";
/* VF CFG keys */
@@ -51,6 +56,10 @@ const char *xe_guc_klv_key_to_string(u16 key)
return "begin_ctx_id";
case GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY:
return "sched_priority";
+ case GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_KEY:
+ return "sched_groups_exec_quantum";
+ case GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_KEY:
+ return "sched_groups_preempt_timeout";
/* VF CFG threshold keys */
#define define_threshold_key_to_string_case(TAG, NAME, ...) \
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 0b590271c326..7a4218f76024 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -407,7 +407,7 @@ static int guc_init_global_schedule_policy(struct xe_guc *guc)
*emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
- if (CCS_MASK(guc_to_gt(guc)))
+ if (CCS_INSTANCES(guc_to_gt(guc)))
emit = emit_render_compute_yield_klv(emit);
count = emit - data;
@@ -673,6 +673,23 @@ static void set_exec_queue_group_banned(struct xe_exec_queue *q)
mutex_unlock(&group->list_lock);
}
+/* Helper for context registration H2G */
+struct guc_ctxt_registration_info {
+ u32 flags;
+ u32 context_idx;
+ u32 engine_class;
+ u32 engine_submit_mask;
+ u32 wq_desc_lo;
+ u32 wq_desc_hi;
+ u32 wq_base_lo;
+ u32 wq_base_hi;
+ u32 wq_size;
+ u32 cgp_lo;
+ u32 cgp_hi;
+ u32 hwlrca_lo;
+ u32 hwlrca_hi;
+};
+
#define parallel_read(xe_, map_, field_) \
xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
field_)
@@ -3547,6 +3564,27 @@ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
}
/**
+ * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues
+ * registered with the GuC
+ * @guc: GuC.
+ *
+ * Return: true if any MLRC queue is registered with the GuC, false otherwise.
+ */
+bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ guard(mutex)(&guc->submission_state.lock);
+
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ if (q->width > 1)
+ return true;
+
+ return false;
+}
+
+/**
* xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all
* exec queues registered to given GuC.
* @guc: the &xe_guc struct instance
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index 4d89b2975fe9..b3839a90c142 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -52,6 +52,8 @@ xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapsh
void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type);
+bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc);
+
int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch);
#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index f3b66b55acfb..00eef41a9e36 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -35,6 +35,7 @@
#include "xe_sa.h"
#include "xe_sched_job.h"
#include "xe_sriov_vf_ccs.h"
+#include "xe_svm.h"
#include "xe_sync.h"
#include "xe_trace_bo.h"
#include "xe_validation.h"
@@ -471,7 +472,8 @@ int xe_migrate_init(struct xe_migrate *m)
EXEC_QUEUE_FLAG_KERNEL |
EXEC_QUEUE_FLAG_PERMANENT |
EXEC_QUEUE_FLAG_HIGH_PRIORITY |
- EXEC_QUEUE_FLAG_MIGRATE, 0);
+ EXEC_QUEUE_FLAG_MIGRATE |
+ EXEC_QUEUE_FLAG_LOW_LATENCY, 0);
} else {
m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
XE_ENGINE_CLASS_COPY,
@@ -2048,7 +2050,8 @@ static void build_pt_update_batch_sram(struct xe_migrate *m,
u64 pte;
xe_tile_assert(m->tile, sram_addr[i].proto ==
- DRM_INTERCONNECT_SYSTEM);
+ DRM_INTERCONNECT_SYSTEM ||
+ sram_addr[i].proto == XE_INTERCONNECT_P2P);
xe_tile_assert(m->tile, addr);
xe_tile_assert(m->tile, PAGE_ALIGNED(addr));
@@ -2113,6 +2116,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
unsigned long sram_offset,
struct drm_pagemap_addr *sram_addr,
u64 vram_addr,
+ struct dma_fence *deps,
const enum xe_migrate_copy_dir dir)
{
struct xe_gt *gt = m->tile->primary_gt;
@@ -2201,6 +2205,14 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB);
+ if (deps && !dma_fence_is_signaled(deps)) {
+ dma_fence_get(deps);
+ err = drm_sched_job_add_dependency(&job->drm, deps);
+ if (err)
+ dma_fence_wait(deps, false);
+ err = 0;
+ }
+
mutex_lock(&m->job_mutex);
xe_sched_job_arm(job);
fence = dma_fence_get(&job->drm.s_fence->finished);
@@ -2226,6 +2238,8 @@ err:
* @npages: Number of pages to migrate.
* @src_addr: Array of DMA information (source of migrate)
* @dst_addr: Device physical address of VRAM (destination of migrate)
+ * @deps: struct dma_fence representing the dependencies that need
+ * to be signaled before migration.
*
* Copy from an array dma addresses to a VRAM device physical address
*
@@ -2235,10 +2249,11 @@ err:
struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
unsigned long npages,
struct drm_pagemap_addr *src_addr,
- u64 dst_addr)
+ u64 dst_addr,
+ struct dma_fence *deps)
{
return xe_migrate_vram(m, npages * PAGE_SIZE, 0, src_addr, dst_addr,
- XE_MIGRATE_COPY_TO_VRAM);
+ deps, XE_MIGRATE_COPY_TO_VRAM);
}
/**
@@ -2247,6 +2262,8 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
* @npages: Number of pages to migrate.
* @src_addr: Device physical address of VRAM (source of migrate)
* @dst_addr: Array of DMA information (destination of migrate)
+ * @deps: struct dma_fence representing the dependencies that need
+ * to be signaled before migration.
*
* Copy from a VRAM device physical address to an array dma addresses
*
@@ -2256,10 +2273,11 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
unsigned long npages,
u64 src_addr,
- struct drm_pagemap_addr *dst_addr)
+ struct drm_pagemap_addr *dst_addr,
+ struct dma_fence *deps)
{
return xe_migrate_vram(m, npages * PAGE_SIZE, 0, dst_addr, src_addr,
- XE_MIGRATE_COPY_TO_SRAM);
+ deps, XE_MIGRATE_COPY_TO_SRAM);
}
static void xe_migrate_dma_unmap(struct xe_device *xe,
@@ -2435,7 +2453,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
__fence = xe_migrate_vram(m, current_bytes,
(unsigned long)buf & ~PAGE_MASK,
&pagemap_addr[current_page],
- vram_addr, write ?
+ vram_addr, NULL, write ?
XE_MIGRATE_COPY_TO_VRAM :
XE_MIGRATE_COPY_TO_SRAM);
if (IS_ERR(__fence)) {
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 464c05dde1ba..1522afb37dcf 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -116,12 +116,14 @@ int xe_migrate_init(struct xe_migrate *m);
struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
unsigned long npages,
struct drm_pagemap_addr *src_addr,
- u64 dst_addr);
+ u64 dst_addr,
+ struct dma_fence *deps);
struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
unsigned long npages,
u64 src_addr,
- struct drm_pagemap_addr *dst_addr);
+ struct drm_pagemap_addr *dst_addr,
+ struct dma_fence *deps);
struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
struct xe_bo *src_bo,
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 18d4e6b5c319..a1fdca451ce0 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -27,6 +27,7 @@
#include "xe_macros.h"
#include "xe_mmio.h"
#include "xe_module.h"
+#include "xe_pci_rebar.h"
#include "xe_pci_sriov.h"
#include "xe_pci_types.h"
#include "xe_pm.h"
@@ -370,6 +371,7 @@ static const struct xe_device_desc bmg_desc = {
.has_i2c = true,
.has_late_bind = true,
.has_pre_prod_wa = 1,
+ .has_soc_remapper_telem = true,
.has_sriov = true,
.has_mem_copy_instr = true,
.max_gt_per_tile = 2,
@@ -421,6 +423,8 @@ static const struct xe_device_desc cri_desc = {
.has_mbx_power_limits = true,
.has_mert = true,
.has_pre_prod_wa = 1,
+ .has_soc_remapper_sysctrl = true,
+ .has_soc_remapper_telem = true,
.has_sriov = true,
.max_gt_per_tile = 2,
.require_force_probe = true,
@@ -692,6 +696,8 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.has_page_reclaim_hw_assist = desc->has_page_reclaim_hw_assist;
xe->info.has_pre_prod_wa = desc->has_pre_prod_wa;
xe->info.has_pxp = desc->has_pxp;
+ xe->info.has_soc_remapper_sysctrl = desc->has_soc_remapper_sysctrl;
+ xe->info.has_soc_remapper_telem = desc->has_soc_remapper_telem;
xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) &&
desc->has_sriov;
xe->info.has_mem_copy_instr = desc->has_mem_copy_instr;
@@ -1016,7 +1022,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
return err;
- xe_vram_resize_bar(xe);
+ xe_pci_rebar_resize(xe);
err = xe_device_probe_early(xe);
/*
diff --git a/drivers/gpu/drm/xe/xe_pci_rebar.c b/drivers/gpu/drm/xe/xe_pci_rebar.c
new file mode 100644
index 000000000000..7e2c7079b6ff
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci_rebar.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/pci.h>
+#include <linux/types.h>
+
+#include "regs/xe_bars.h"
+#include "xe_device_types.h"
+#include "xe_module.h"
+#include "xe_pci_rebar.h"
+#include "xe_printk.h"
+
+static void resize_bar(struct xe_device *xe, int resno, resource_size_t size)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ int bar_size = pci_rebar_bytes_to_size(size);
+ int ret;
+
+ ret = pci_resize_resource(pdev, resno, bar_size, 0);
+ if (ret) {
+ xe_info(xe, "Failed to resize BAR%d to %dMiB (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n",
+ resno, 1 << bar_size, ERR_PTR(ret));
+ return;
+ }
+
+ xe_info(xe, "BAR%d resized to %dMiB\n", resno, 1 << bar_size);
+}
+
+/*
+ * xe_pci_rebar_resize - Resize the LMEMBAR
+ * @xe: xe device instance
+ *
+ * If vram_bar_size module param is set, attempt to set to the requested size
+ * else set to maximum possible size.
+ */
+void xe_pci_rebar_resize(struct xe_device *xe)
+{
+ int force_vram_bar_size = xe_modparam.force_vram_bar_size;
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ struct pci_bus *root = pdev->bus;
+ resource_size_t current_size;
+ resource_size_t rebar_size;
+ struct resource *root_res;
+ int max_size, i;
+ u32 pci_cmd;
+
+ /* gather some relevant info */
+ current_size = pci_resource_len(pdev, LMEM_BAR);
+
+ if (force_vram_bar_size < 0)
+ return;
+
+ /* set to a specific size? */
+ if (force_vram_bar_size) {
+ rebar_size = pci_rebar_bytes_to_size(force_vram_bar_size *
+ (resource_size_t)SZ_1M);
+
+ if (!pci_rebar_size_supported(pdev, LMEM_BAR, rebar_size)) {
+ xe_info(xe, "Requested size %lluMiB is not supported by rebar sizes: 0x%llx. Leaving default: %lluMiB\n",
+ (u64)pci_rebar_size_to_bytes(rebar_size) >> ilog2(SZ_1M),
+ pci_rebar_get_possible_sizes(pdev, LMEM_BAR),
+ (u64)current_size >> ilog2(SZ_1M));
+ return;
+ }
+
+ rebar_size = pci_rebar_size_to_bytes(rebar_size);
+ if (rebar_size == current_size)
+ return;
+ } else {
+ max_size = pci_rebar_get_max_size(pdev, LMEM_BAR);
+ if (max_size < 0)
+ return;
+ rebar_size = pci_rebar_size_to_bytes(max_size);
+
+ /* only resize if larger than current */
+ if (rebar_size <= current_size)
+ return;
+ }
+
+ xe_info(xe, "Attempting to resize bar from %lluMiB -> %lluMiB\n",
+ (u64)current_size >> ilog2(SZ_1M), (u64)rebar_size >> ilog2(SZ_1M));
+
+ while (root->parent)
+ root = root->parent;
+
+ pci_bus_for_each_resource(root, root_res, i) {
+ if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
+ (u64)root_res->start > 0x100000000ul)
+ break;
+ }
+
+ if (!root_res) {
+ xe_info(xe, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n");
+ return;
+ }
+
+ pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
+ pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
+
+ resize_bar(xe, LMEM_BAR, rebar_size);
+
+ pci_assign_unassigned_bus_resources(pdev->bus);
+ pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
+}
diff --git a/drivers/gpu/drm/xe/xe_pci_rebar.h b/drivers/gpu/drm/xe/xe_pci_rebar.h
new file mode 100644
index 000000000000..8677921ac363
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci_rebar.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_PCI_REBAR_H_
+#define _XE_PCI_REBAR_H_
+
+struct xe_device;
+
+void xe_pci_rebar_resize(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index 3bb51d155951..5f20f56571d1 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -53,6 +53,8 @@ struct xe_device_desc {
u8 has_pre_prod_wa:1;
u8 has_page_reclaim_hw_assist:1;
u8 has_pxp:1;
+ u8 has_soc_remapper_sysctrl:1;
+ u8 has_soc_remapper_telem:1;
u8 has_sriov:1;
u8 needs_scratch:1;
u8 skip_guc_pc:1;
diff --git a/drivers/gpu/drm/xe/xe_soc_remapper.c b/drivers/gpu/drm/xe/xe_soc_remapper.c
new file mode 100644
index 000000000000..1c391d719196
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_soc_remapper.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "regs/xe_soc_remapper_regs.h"
+#include "xe_mmio.h"
+#include "xe_soc_remapper.h"
+
+static void xe_soc_remapper_set_region(struct xe_device *xe, struct xe_reg reg,
+ u32 mask, u32 val)
+{
+ guard(spinlock_irqsave)(&xe->soc_remapper.lock);
+ xe_mmio_rmw32(xe_root_tile_mmio(xe), reg, mask, val);
+}
+
+static void xe_soc_remapper_set_telem_region(struct xe_device *xe, u32 index)
+{
+ xe_soc_remapper_set_region(xe, SG_REMAP_INDEX1, SG_REMAP_TELEM_MASK,
+ REG_FIELD_PREP(SG_REMAP_TELEM_MASK, index));
+}
+
+static void xe_soc_remapper_set_sysctrl_region(struct xe_device *xe, u32 index)
+{
+ xe_soc_remapper_set_region(xe, SG_REMAP_INDEX1, SG_REMAP_SYSCTRL_MASK,
+ REG_FIELD_PREP(SG_REMAP_SYSCTRL_MASK, index));
+}
+
+/**
+ * xe_soc_remapper_init() - Initialize SoC remapper
+ * @xe: Pointer to xe device.
+ *
+ * Initialize SoC remapper.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int xe_soc_remapper_init(struct xe_device *xe)
+{
+ bool has_soc_remapper = xe->info.has_soc_remapper_telem ||
+ xe->info.has_soc_remapper_sysctrl;
+
+ if (has_soc_remapper)
+ spin_lock_init(&xe->soc_remapper.lock);
+
+ if (xe->info.has_soc_remapper_telem)
+ xe->soc_remapper.set_telem_region = xe_soc_remapper_set_telem_region;
+
+ if (xe->info.has_soc_remapper_sysctrl)
+ xe->soc_remapper.set_sysctrl_region = xe_soc_remapper_set_sysctrl_region;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_soc_remapper.h b/drivers/gpu/drm/xe/xe_soc_remapper.h
new file mode 100644
index 000000000000..1060ad0f5abc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_soc_remapper.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SOC_REMAPPER_H_
+#define _XE_SOC_REMAPPER_H_
+
+#include "xe_device_types.h"
+
+int xe_soc_remapper_init(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 93550c7c84ac..fa2ee2c08f31 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -3,7 +3,12 @@
* Copyright © 2024 Intel Corporation
*/
+#include <linux/pci-p2pdma.h>
+
#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_pagemap.h>
+#include <drm/drm_pagemap_util.h>
#include "xe_bo.h"
#include "xe_exec_queue_types.h"
@@ -19,6 +24,38 @@
#include "xe_vm_types.h"
#include "xe_vram_types.h"
+/* Identifies subclasses of struct drm_pagemap_peer */
+#define XE_PEER_PAGEMAP ((void *)0ul)
+#define XE_PEER_VM ((void *)1ul)
+
+/**
+ * DOC: drm_pagemap reference-counting in xe:
+ *
+ * In addition to the drm_pagemap internal reference counting by its zone
+ * device data, the xe driver holds the following long-time references:
+ *
+ * - struct xe_pagemap:
+ * The xe_pagemap struct derives from struct drm_pagemap and uses its
+ * reference count.
+ * - SVM-enabled VMs:
+ * SVM-enabled VMs look up and keeps a reference to all xe_pagemaps on
+ * the same device.
+ * - VMAs:
+ * vmas keep a reference on the drm_pagemap indicated by a gpu_madvise()
+ * call.
+ *
+ * In addition, all drm_pagemap or xe_pagemap pointers where lifetime cannot
+ * be guaranteed by a vma reference under the vm lock should keep a reference.
+ * That includes the range->pages.dpagemap pointer.
+ */
+
+static int xe_svm_get_pagemaps(struct xe_vm *vm);
+
+void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem)
+{
+ return force_smem ? NULL : vm->svm.peer.owner;
+}
+
static bool xe_svm_range_in_vram(struct xe_svm_range *range)
{
/*
@@ -276,10 +313,14 @@ static int __xe_svm_garbage_collector(struct xe_vm *vm,
static void xe_vma_set_default_attributes(struct xe_vma *vma)
{
- vma->attr.preferred_loc.devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE;
- vma->attr.preferred_loc.migration_policy = DRM_XE_MIGRATE_ALL_PAGES;
- vma->attr.pat_index = vma->attr.default_pat_index;
- vma->attr.atomic_access = DRM_XE_ATOMIC_UNDEFINED;
+ struct xe_vma_mem_attr default_attr = {
+ .preferred_loc.devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
+ .preferred_loc.migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
+ .pat_index = vma->attr.default_pat_index,
+ .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
+ };
+
+ xe_vma_mem_attr_copy(&vma->attr, &default_attr);
}
static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 start, u64 end)
@@ -390,27 +431,47 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w)
#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
-static struct xe_vram_region *page_to_vr(struct page *page)
+static struct xe_vram_region *xe_pagemap_to_vr(struct xe_pagemap *xpagemap)
{
- return container_of(page_pgmap(page), struct xe_vram_region, pagemap);
+ return xpagemap->vr;
}
-static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr,
- struct page *page)
+static struct xe_pagemap *xe_page_to_pagemap(struct page *page)
{
- u64 dpa;
+ return container_of(page_pgmap(page), struct xe_pagemap, pagemap);
+}
+
+static struct xe_vram_region *xe_page_to_vr(struct page *page)
+{
+ return xe_pagemap_to_vr(xe_page_to_pagemap(page));
+}
+
+static u64 xe_page_to_dpa(struct page *page)
+{
+ struct xe_pagemap *xpagemap = xe_page_to_pagemap(page);
+ struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
+ u64 hpa_base = xpagemap->hpa_base;
u64 pfn = page_to_pfn(page);
u64 offset;
+ u64 dpa;
xe_assert(vr->xe, is_device_private_page(page));
- xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= vr->hpa_base);
+ xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= hpa_base);
- offset = (pfn << PAGE_SHIFT) - vr->hpa_base;
+ offset = (pfn << PAGE_SHIFT) - hpa_base;
dpa = vr->dpa_base + offset;
return dpa;
}
+static u64 xe_page_to_pcie(struct page *page)
+{
+ struct xe_pagemap *xpagemap = xe_page_to_pagemap(page);
+ struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
+
+ return xe_page_to_dpa(page) - vr->dpa_base + vr->io_start;
+}
+
enum xe_svm_copy_dir {
XE_SVM_COPY_TO_VRAM,
XE_SVM_COPY_TO_SRAM,
@@ -472,7 +533,8 @@ static void xe_svm_copy_us_stats_incr(struct xe_gt *gt,
static int xe_svm_copy(struct page **pages,
struct drm_pagemap_addr *pagemap_addr,
- unsigned long npages, const enum xe_svm_copy_dir dir)
+ unsigned long npages, const enum xe_svm_copy_dir dir,
+ struct dma_fence *pre_migrate_fence)
{
struct xe_vram_region *vr = NULL;
struct xe_gt *gt = NULL;
@@ -509,11 +571,11 @@ static int xe_svm_copy(struct page **pages,
continue;
if (!vr && spage) {
- vr = page_to_vr(spage);
+ vr = xe_page_to_vr(spage);
gt = xe_migrate_exec_queue(vr->migrate)->gt;
xe = vr->xe;
}
- XE_WARN_ON(spage && page_to_vr(spage) != vr);
+ XE_WARN_ON(spage && xe_page_to_vr(spage) != vr);
/*
* CPU page and device page valid, capture physical address on
@@ -521,7 +583,7 @@ static int xe_svm_copy(struct page **pages,
* device pages.
*/
if (pagemap_addr[i].addr && spage) {
- __vram_addr = xe_vram_region_page_to_dpa(vr, spage);
+ __vram_addr = xe_page_to_dpa(spage);
if (vram_addr == XE_VRAM_ADDR_INVALID) {
vram_addr = __vram_addr;
pos = i;
@@ -561,7 +623,8 @@ static int xe_svm_copy(struct page **pages,
__fence = xe_migrate_from_vram(vr->migrate,
i - pos + incr,
vram_addr,
- &pagemap_addr[pos]);
+ &pagemap_addr[pos],
+ pre_migrate_fence);
} else {
vm_dbg(&xe->drm,
"COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
@@ -570,13 +633,14 @@ static int xe_svm_copy(struct page **pages,
__fence = xe_migrate_to_vram(vr->migrate,
i - pos + incr,
&pagemap_addr[pos],
- vram_addr);
+ vram_addr,
+ pre_migrate_fence);
}
if (IS_ERR(__fence)) {
err = PTR_ERR(__fence);
goto err_out;
}
-
+ pre_migrate_fence = NULL;
dma_fence_put(fence);
fence = __fence;
}
@@ -599,20 +663,22 @@ static int xe_svm_copy(struct page **pages,
vram_addr, (u64)pagemap_addr[pos].addr, 1);
__fence = xe_migrate_from_vram(vr->migrate, 1,
vram_addr,
- &pagemap_addr[pos]);
+ &pagemap_addr[pos],
+ pre_migrate_fence);
} else {
vm_dbg(&xe->drm,
"COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
(u64)pagemap_addr[pos].addr, vram_addr, 1);
__fence = xe_migrate_to_vram(vr->migrate, 1,
&pagemap_addr[pos],
- vram_addr);
+ vram_addr,
+ pre_migrate_fence);
}
if (IS_ERR(__fence)) {
err = PTR_ERR(__fence);
goto err_out;
}
-
+ pre_migrate_fence = NULL;
dma_fence_put(fence);
fence = __fence;
}
@@ -625,6 +691,8 @@ err_out:
dma_fence_wait(fence, false);
dma_fence_put(fence);
}
+ if (pre_migrate_fence)
+ dma_fence_wait(pre_migrate_fence, false);
/*
* XXX: We can't derive the GT here (or anywhere in this functions, but
@@ -641,16 +709,20 @@ err_out:
static int xe_svm_copy_to_devmem(struct page **pages,
struct drm_pagemap_addr *pagemap_addr,
- unsigned long npages)
+ unsigned long npages,
+ struct dma_fence *pre_migrate_fence)
{
- return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_VRAM);
+ return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_VRAM,
+ pre_migrate_fence);
}
static int xe_svm_copy_to_ram(struct page **pages,
struct drm_pagemap_addr *pagemap_addr,
- unsigned long npages)
+ unsigned long npages,
+ struct dma_fence *pre_migrate_fence)
{
- return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_SRAM);
+ return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_SRAM,
+ pre_migrate_fence);
}
static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation)
@@ -663,13 +735,16 @@ static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation)
struct xe_bo *bo = to_xe_bo(devmem_allocation);
struct xe_device *xe = xe_bo_device(bo);
+ dma_fence_put(devmem_allocation->pre_migrate_fence);
xe_bo_put_async(bo);
xe_pm_runtime_put(xe);
}
-static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset)
+static u64 block_offset_to_pfn(struct drm_pagemap *dpagemap, u64 offset)
{
- return PHYS_PFN(offset + vr->hpa_base);
+ struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
+
+ return PHYS_PFN(offset + xpagemap->hpa_base);
}
static struct drm_buddy *vram_to_buddy(struct xe_vram_region *vram)
@@ -689,7 +764,8 @@ static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocati
list_for_each_entry(block, blocks, link) {
struct xe_vram_region *vr = block->private;
struct drm_buddy *buddy = vram_to_buddy(vr);
- u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block));
+ u64 block_pfn = block_offset_to_pfn(devmem_allocation->dpagemap,
+ drm_buddy_block_offset(block));
int i;
for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i)
@@ -706,6 +782,11 @@ static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = {
.copy_to_ram = xe_svm_copy_to_ram,
};
+#else
+static int xe_svm_get_pagemaps(struct xe_vm *vm)
+{
+ return 0;
+}
#endif
static const struct drm_gpusvm_ops gpusvm_ops = {
@@ -720,6 +801,48 @@ static const unsigned long fault_chunk_sizes[] = {
SZ_4K,
};
+static void xe_pagemap_put(struct xe_pagemap *xpagemap)
+{
+ drm_pagemap_put(&xpagemap->dpagemap);
+}
+
+static void xe_svm_put_pagemaps(struct xe_vm *vm)
+{
+ struct xe_device *xe = vm->xe;
+ struct xe_tile *tile;
+ int id;
+
+ for_each_tile(tile, xe, id) {
+ struct xe_pagemap *xpagemap = vm->svm.pagemaps[id];
+
+ if (xpagemap)
+ xe_pagemap_put(xpagemap);
+ vm->svm.pagemaps[id] = NULL;
+ }
+}
+
+static struct device *xe_peer_to_dev(struct drm_pagemap_peer *peer)
+{
+ if (peer->private == XE_PEER_PAGEMAP)
+ return container_of(peer, struct xe_pagemap, peer)->dpagemap.drm->dev;
+
+ return container_of(peer, struct xe_vm, svm.peer)->xe->drm.dev;
+}
+
+static bool xe_has_interconnect(struct drm_pagemap_peer *peer1,
+ struct drm_pagemap_peer *peer2)
+{
+ struct device *dev1 = xe_peer_to_dev(peer1);
+ struct device *dev2 = xe_peer_to_dev(peer2);
+
+ if (dev1 == dev2)
+ return true;
+
+ return pci_p2pdma_distance(to_pci_dev(dev1), dev2, true) >= 0;
+}
+
+static DRM_PAGEMAP_OWNER_LIST_DEFINE(xe_owner_list);
+
/**
* xe_svm_init() - SVM initialize
* @vm: The VM.
@@ -738,12 +861,30 @@ int xe_svm_init(struct xe_vm *vm)
INIT_WORK(&vm->svm.garbage_collector.work,
xe_svm_garbage_collector_work_func);
+ vm->svm.peer.private = XE_PEER_VM;
+ err = drm_pagemap_acquire_owner(&vm->svm.peer, &xe_owner_list,
+ xe_has_interconnect);
+ if (err)
+ return err;
+
+ err = xe_svm_get_pagemaps(vm);
+ if (err) {
+ drm_pagemap_release_owner(&vm->svm.peer);
+ return err;
+ }
+
err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
current->mm, 0, vm->size,
xe_modparam.svm_notifier_size * SZ_1M,
&gpusvm_ops, fault_chunk_sizes,
ARRAY_SIZE(fault_chunk_sizes));
drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
+
+ if (err) {
+ xe_svm_put_pagemaps(vm);
+ drm_pagemap_release_owner(&vm->svm.peer);
+ return err;
+ }
} else {
err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)",
&vm->xe->drm, NULL, 0, 0, 0, NULL,
@@ -763,6 +904,8 @@ void xe_svm_close(struct xe_vm *vm)
{
xe_assert(vm->xe, xe_vm_is_closed(vm));
flush_work(&vm->svm.garbage_collector.work);
+ xe_svm_put_pagemaps(vm);
+ drm_pagemap_release_owner(&vm->svm.peer);
}
/**
@@ -778,13 +921,34 @@ void xe_svm_fini(struct xe_vm *vm)
drm_gpusvm_fini(&vm->svm.gpusvm);
}
+static bool xe_svm_range_has_pagemap_locked(const struct xe_svm_range *range,
+ const struct drm_pagemap *dpagemap)
+{
+ return range->base.pages.dpagemap == dpagemap;
+}
+
+static bool xe_svm_range_has_pagemap(struct xe_svm_range *range,
+ const struct drm_pagemap *dpagemap)
+{
+ struct xe_vm *vm = range_to_vm(&range->base);
+ bool ret;
+
+ xe_svm_notifier_lock(vm);
+ ret = xe_svm_range_has_pagemap_locked(range, dpagemap);
+ xe_svm_notifier_unlock(vm);
+
+ return ret;
+}
+
static bool xe_svm_range_is_valid(struct xe_svm_range *range,
struct xe_tile *tile,
- bool devmem_only)
+ bool devmem_only,
+ const struct drm_pagemap *dpagemap)
+
{
return (xe_vm_has_valid_gpu_mapping(tile, range->tile_present,
range->tile_invalidated) &&
- (!devmem_only || xe_svm_range_in_vram(range)));
+ (!devmem_only || xe_svm_range_has_pagemap(range, dpagemap)));
}
/** xe_svm_range_migrate_to_smem() - Move range pages from VRAM to SMEM
@@ -805,7 +969,8 @@ void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range)
* @vm: xe_vm pointer
* @range: Pointer to the SVM range structure
* @tile_mask: Mask representing the tiles to be checked
- * @devmem_preferred : if true range needs to be in devmem
+ * @dpagemap: if !%NULL, the range is expected to be present
+ * in device memory identified by this parameter.
*
* The xe_svm_range_validate() function checks if a range is
* valid and located in the desired memory region.
@@ -814,14 +979,15 @@ void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range)
*/
bool xe_svm_range_validate(struct xe_vm *vm,
struct xe_svm_range *range,
- u8 tile_mask, bool devmem_preferred)
+ u8 tile_mask, const struct drm_pagemap *dpagemap)
{
bool ret;
xe_svm_notifier_lock(vm);
- ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask &&
- (devmem_preferred == range->base.pages.flags.has_devmem_pages);
+ ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask;
+ if (dpagemap)
+ ret = ret && xe_svm_range_has_pagemap_locked(range, dpagemap);
xe_svm_notifier_unlock(vm);
@@ -856,7 +1022,13 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
struct mm_struct *mm,
unsigned long timeslice_ms)
{
- struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap);
+ struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
+ struct drm_pagemap_migrate_details mdetails = {
+ .timeslice_ms = timeslice_ms,
+ .source_peer_migrates = 1,
+ };
+ struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
+ struct dma_fence *pre_migrate_fence = NULL;
struct xe_device *xe = vr->xe;
struct device *dev = xe->drm.dev;
struct drm_buddy_block *block;
@@ -883,8 +1055,20 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
break;
}
+ /* Ensure that any clearing or async eviction will complete before migration. */
+ if (!dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) {
+ err = dma_resv_get_singleton(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+ &pre_migrate_fence);
+ if (err)
+ dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+ false, MAX_SCHEDULE_TIMEOUT);
+ else if (pre_migrate_fence)
+ dma_fence_enable_sw_signaling(pre_migrate_fence);
+ }
+
drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm,
- &dpagemap_devmem_ops, dpagemap, end - start);
+ &dpagemap_devmem_ops, dpagemap, end - start,
+ pre_migrate_fence);
blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
list_for_each_entry(block, blocks, link)
@@ -894,11 +1078,9 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
/* Ensure the device has a pm ref while there are device pages active. */
xe_pm_runtime_get_noresume(xe);
+ /* Consumes the devmem allocation ref. */
err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm,
- start, end, timeslice_ms,
- xe_svm_devm_owner(xe));
- if (err)
- xe_svm_devmem_release(&bo->devmem_allocation);
+ start, end, &mdetails);
xe_bo_unlock(bo);
xe_bo_put(bo);
}
@@ -921,23 +1103,23 @@ static bool supports_4K_migration(struct xe_device *xe)
* xe_svm_range_needs_migrate_to_vram() - SVM range needs migrate to VRAM or not
* @range: SVM range for which migration needs to be decided
* @vma: vma which has range
- * @preferred_region_is_vram: preferred region for range is vram
+ * @dpagemap: The preferred struct drm_pagemap to migrate to.
*
* Return: True for range needing migration and migration is supported else false
*/
bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma,
- bool preferred_region_is_vram)
+ const struct drm_pagemap *dpagemap)
{
struct xe_vm *vm = range_to_vm(&range->base);
u64 range_size = xe_svm_range_size(range);
- if (!range->base.pages.flags.migrate_devmem || !preferred_region_is_vram)
+ if (!range->base.pages.flags.migrate_devmem || !dpagemap)
return false;
xe_assert(vm->xe, IS_DGFX(vm->xe));
- if (xe_svm_range_in_vram(range)) {
- drm_info(&vm->xe->drm, "Range is already in VRAM\n");
+ if (xe_svm_range_has_pagemap(range, dpagemap)) {
+ drm_dbg(&vm->xe->drm, "Range is already in VRAM\n");
return false;
}
@@ -1011,7 +1193,6 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
.devmem_only = need_vram && devmem_possible,
.timeslice_ms = need_vram && devmem_possible ?
vm->xe->atomic_svm_timeslice_ms : 0,
- .device_private_page_owner = xe_svm_devm_owner(vm->xe),
};
struct xe_validation_ctx vctx;
struct drm_exec exec;
@@ -1034,9 +1215,9 @@ retry:
if (err)
return err;
- dpagemap = xe_vma_resolve_pagemap(vma, tile);
- if (!dpagemap && !ctx.devmem_only)
- ctx.device_private_page_owner = NULL;
+ dpagemap = ctx.devmem_only ? xe_tile_local_pagemap(tile) :
+ xe_vma_resolve_pagemap(vma, tile);
+ ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap);
range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx);
if (IS_ERR(range))
@@ -1049,7 +1230,7 @@ retry:
goto out;
}
- if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) {
+ if (xe_svm_range_is_valid(range, tile, ctx.devmem_only, dpagemap)) {
xe_svm_range_valid_fault_count_stats_incr(gt, range);
range_debug(range, "PAGE FAULT - VALID");
goto out;
@@ -1058,16 +1239,11 @@ retry:
range_debug(range, "PAGE FAULT");
if (--migrate_try_count >= 0 &&
- xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) {
+ xe_svm_range_needs_migrate_to_vram(range, vma, dpagemap)) {
ktime_t migrate_start = xe_gt_stats_ktime_get();
- /* TODO : For multi-device dpagemap will be used to find the
- * remote tile and remote device. Will need to modify
- * xe_svm_alloc_vram to use dpagemap for future multi-device
- * support.
- */
xe_svm_range_migrate_count_stats_incr(gt, range);
- err = xe_svm_alloc_vram(tile, range, &ctx);
+ err = xe_svm_alloc_vram(range, &ctx, dpagemap);
xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start);
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
if (err) {
@@ -1118,6 +1294,10 @@ get_pages:
if (err) {
range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
goto out;
+ } else if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) {
+ drm_dbg(&vm->xe->drm, "After page collect data location is %sin \"%s\".\n",
+ xe_svm_range_has_pagemap(range, dpagemap) ? "" : "NOT ",
+ dpagemap ? dpagemap->drm->unique : "System.");
}
xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start);
@@ -1365,11 +1545,6 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end)
#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
-static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile)
-{
- return &tile->mem.vram->dpagemap;
-}
-
/**
* xe_vma_resolve_pagemap - Resolve the appropriate DRM pagemap for a VMA
* @vma: Pointer to the xe_vma structure containing memory attributes
@@ -1389,40 +1564,87 @@ static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile)
*/
struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile)
{
- s32 fd = (s32)vma->attr.preferred_loc.devmem_fd;
+ struct drm_pagemap *dpagemap = vma->attr.preferred_loc.dpagemap;
+ s32 fd;
+
+ if (dpagemap)
+ return dpagemap;
+
+ fd = (s32)vma->attr.preferred_loc.devmem_fd;
if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)
return NULL;
if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE)
- return IS_DGFX(tile_to_xe(tile)) ? tile_local_pagemap(tile) : NULL;
+ return IS_DGFX(tile_to_xe(tile)) ? xe_tile_local_pagemap(tile) : NULL;
- /* TODO: Support multi-device with drm_pagemap_from_fd(fd) */
return NULL;
}
/**
* xe_svm_alloc_vram()- Allocate device memory pages for range,
* migrating existing data.
- * @tile: tile to allocate vram from
* @range: SVM range
* @ctx: DRM GPU SVM context
+ * @dpagemap: The struct drm_pagemap representing the memory to allocate.
*
* Return: 0 on success, error code on failure.
*/
-int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
- const struct drm_gpusvm_ctx *ctx)
+int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx,
+ struct drm_pagemap *dpagemap)
{
- struct drm_pagemap *dpagemap;
+ static DECLARE_RWSEM(driver_migrate_lock);
+ struct xe_vm *vm = range_to_vm(&range->base);
+ enum drm_gpusvm_scan_result migration_state;
+ struct xe_device *xe = vm->xe;
+ int err, retries = 1;
+ bool write_locked = false;
- xe_assert(tile_to_xe(tile), range->base.pages.flags.migrate_devmem);
+ xe_assert(range_to_vm(&range->base)->xe, range->base.pages.flags.migrate_devmem);
range_debug(range, "ALLOCATE VRAM");
- dpagemap = tile_local_pagemap(tile);
- return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range),
- xe_svm_range_end(range),
- range->base.gpusvm->mm,
- ctx->timeslice_ms);
+ migration_state = drm_gpusvm_scan_mm(&range->base,
+ xe_svm_private_page_owner(vm, false),
+ dpagemap->pagemap);
+
+ if (migration_state == DRM_GPUSVM_SCAN_EQUAL) {
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
+ drm_dbg(dpagemap->drm, "Already migrated!\n");
+ return 0;
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
+ drm_dbg(&xe->drm, "Request migration to device memory on \"%s\".\n",
+ dpagemap->drm->unique);
+
+ err = down_read_interruptible(&driver_migrate_lock);
+ if (err)
+ return err;
+ do {
+ err = drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range),
+ xe_svm_range_end(range),
+ range->base.gpusvm->mm,
+ ctx->timeslice_ms);
+
+ if (err == -EBUSY && retries) {
+ if (!write_locked) {
+ int lock_err;
+
+ up_read(&driver_migrate_lock);
+ lock_err = down_write_killable(&driver_migrate_lock);
+ if (lock_err)
+ return lock_err;
+ write_locked = true;
+ }
+ drm_gpusvm_range_evict(range->base.gpusvm, &range->base);
+ }
+ } while (err == -EBUSY && retries--);
+ if (write_locked)
+ up_write(&driver_migrate_lock);
+ else
+ up_read(&driver_migrate_lock);
+
+ return err;
}
static struct drm_pagemap_addr
@@ -1432,92 +1654,363 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
unsigned int order,
enum dma_data_direction dir)
{
- struct device *pgmap_dev = dpagemap->dev;
+ struct device *pgmap_dev = dpagemap->drm->dev;
enum drm_interconnect_protocol prot;
dma_addr_t addr;
if (pgmap_dev == dev) {
- addr = xe_vram_region_page_to_dpa(page_to_vr(page), page);
+ addr = xe_page_to_dpa(page);
prot = XE_INTERCONNECT_VRAM;
} else {
- addr = DMA_MAPPING_ERROR;
- prot = 0;
+ addr = dma_map_resource(dev,
+ xe_page_to_pcie(page),
+ PAGE_SIZE << order, dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ prot = XE_INTERCONNECT_P2P;
}
return drm_pagemap_addr_encode(addr, prot, order, dir);
}
+static void xe_drm_pagemap_device_unmap(struct drm_pagemap *dpagemap,
+ struct device *dev,
+ struct drm_pagemap_addr addr)
+{
+ if (addr.proto != XE_INTERCONNECT_P2P)
+ return;
+
+ dma_unmap_resource(dev, addr.addr, PAGE_SIZE << addr.order,
+ addr.dir, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static void xe_pagemap_destroy_work(struct work_struct *work)
+{
+ struct xe_pagemap *xpagemap = container_of(work, typeof(*xpagemap), destroy_work);
+ struct dev_pagemap *pagemap = &xpagemap->pagemap;
+ struct drm_device *drm = xpagemap->dpagemap.drm;
+ int idx;
+
+ /*
+ * Only unmap / release if devm_ release hasn't run yet.
+ * Otherwise the devm_ callbacks have already released, or
+ * will do shortly.
+ */
+ if (drm_dev_enter(drm, &idx)) {
+ devm_memunmap_pages(drm->dev, pagemap);
+ devm_release_mem_region(drm->dev, pagemap->range.start,
+ pagemap->range.end - pagemap->range.start + 1);
+ drm_dev_exit(idx);
+ }
+
+ drm_pagemap_release_owner(&xpagemap->peer);
+ kfree(xpagemap);
+}
+
+static void xe_pagemap_destroy(struct drm_pagemap *dpagemap, bool from_atomic_or_reclaim)
+{
+ struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
+ struct xe_device *xe = to_xe_device(dpagemap->drm);
+
+ if (from_atomic_or_reclaim)
+ queue_work(xe->destroy_wq, &xpagemap->destroy_work);
+ else
+ xe_pagemap_destroy_work(&xpagemap->destroy_work);
+}
+
static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
.device_map = xe_drm_pagemap_device_map,
+ .device_unmap = xe_drm_pagemap_device_unmap,
.populate_mm = xe_drm_pagemap_populate_mm,
+ .destroy = xe_pagemap_destroy,
};
/**
- * xe_devm_add: Remap and provide memmap backing for device memory
- * @tile: tile that the memory region belongs to
- * @vr: vram memory region to remap
+ * xe_pagemap_create() - Create a struct xe_pagemap object
+ * @xe: The xe device.
+ * @vr: Back-pointer to the struct xe_vram_region.
*
- * This remap device memory to host physical address space and create
- * struct page to back device memory
+ * Allocate and initialize a struct xe_pagemap. On successful
+ * return, drm_pagemap_put() on the embedded struct drm_pagemap
+ * should be used to unreference.
*
- * Return: 0 on success standard error code otherwise
+ * Return: Pointer to a struct xe_pagemap if successful. Error pointer
+ * on failure.
*/
-int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
+static struct xe_pagemap *xe_pagemap_create(struct xe_device *xe, struct xe_vram_region *vr)
{
- struct xe_device *xe = tile_to_xe(tile);
- struct device *dev = &to_pci_dev(xe->drm.dev)->dev;
+ struct device *dev = xe->drm.dev;
+ struct xe_pagemap *xpagemap;
+ struct dev_pagemap *pagemap;
+ struct drm_pagemap *dpagemap;
struct resource *res;
void *addr;
- int ret;
+ int err;
+
+ xpagemap = kzalloc(sizeof(*xpagemap), GFP_KERNEL);
+ if (!xpagemap)
+ return ERR_PTR(-ENOMEM);
+
+ pagemap = &xpagemap->pagemap;
+ dpagemap = &xpagemap->dpagemap;
+ INIT_WORK(&xpagemap->destroy_work, xe_pagemap_destroy_work);
+ xpagemap->vr = vr;
+ xpagemap->peer.private = XE_PEER_PAGEMAP;
+
+ err = drm_pagemap_init(dpagemap, pagemap, &xe->drm, &xe_drm_pagemap_ops);
+ if (err)
+ goto out_no_dpagemap;
res = devm_request_free_mem_region(dev, &iomem_resource,
vr->usable_size);
if (IS_ERR(res)) {
- ret = PTR_ERR(res);
- return ret;
+ err = PTR_ERR(res);
+ goto out_err;
+ }
+
+ err = drm_pagemap_acquire_owner(&xpagemap->peer, &xe_owner_list,
+ xe_has_interconnect);
+ if (err)
+ goto out_no_owner;
+
+ pagemap->type = MEMORY_DEVICE_PRIVATE;
+ pagemap->range.start = res->start;
+ pagemap->range.end = res->end;
+ pagemap->nr_range = 1;
+ pagemap->owner = xpagemap->peer.owner;
+ pagemap->ops = drm_pagemap_pagemap_ops_get();
+ addr = devm_memremap_pages(dev, pagemap);
+ if (IS_ERR(addr)) {
+ err = PTR_ERR(addr);
+ goto out_no_pages;
+ }
+ xpagemap->hpa_base = res->start;
+ return xpagemap;
+
+out_no_pages:
+ drm_pagemap_release_owner(&xpagemap->peer);
+out_no_owner:
+ devm_release_mem_region(dev, res->start, res->end - res->start + 1);
+out_err:
+ drm_pagemap_put(dpagemap);
+ return ERR_PTR(err);
+
+out_no_dpagemap:
+ kfree(xpagemap);
+ return ERR_PTR(err);
+}
+
+/**
+ * xe_pagemap_find_or_create() - Find or create a struct xe_pagemap
+ * @xe: The xe device.
+ * @cache: The struct xe_pagemap_cache.
+ * @vr: The VRAM region.
+ *
+ * Check if there is an already used xe_pagemap for this tile, and in that case,
+ * return it.
+ * If not, check if there is a cached xe_pagemap for this tile, and in that case,
+ * cancel its destruction, re-initialize it and return it.
+ * Finally if there is no cached or already used pagemap, create one and
+ * register it in the tile's pagemap cache.
+ *
+ * Note that this function is typically called from within an IOCTL, and waits are
+ * therefore carried out interruptible if possible.
+ *
+ * Return: A pointer to a struct xe_pagemap if successful, Error pointer on failure.
+ */
+static struct xe_pagemap *
+xe_pagemap_find_or_create(struct xe_device *xe, struct drm_pagemap_cache *cache,
+ struct xe_vram_region *vr)
+{
+ struct drm_pagemap *dpagemap;
+ struct xe_pagemap *xpagemap;
+ int err;
+
+ err = drm_pagemap_cache_lock_lookup(cache);
+ if (err)
+ return ERR_PTR(err);
+
+ dpagemap = drm_pagemap_get_from_cache(cache);
+ if (IS_ERR(dpagemap)) {
+ xpagemap = ERR_CAST(dpagemap);
+ } else if (!dpagemap) {
+ xpagemap = xe_pagemap_create(xe, vr);
+ if (IS_ERR(xpagemap))
+ goto out_unlock;
+ drm_pagemap_cache_set_pagemap(cache, &xpagemap->dpagemap);
+ } else {
+ xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
}
- vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
- vr->pagemap.range.start = res->start;
- vr->pagemap.range.end = res->end;
- vr->pagemap.nr_range = 1;
- vr->pagemap.ops = drm_pagemap_pagemap_ops_get();
- vr->pagemap.owner = xe_svm_devm_owner(xe);
- addr = devm_memremap_pages(dev, &vr->pagemap);
+out_unlock:
+ drm_pagemap_cache_unlock_lookup(cache);
+ return xpagemap;
+}
+
+static int xe_svm_get_pagemaps(struct xe_vm *vm)
+{
+ struct xe_device *xe = vm->xe;
+ struct xe_pagemap *xpagemap;
+ struct xe_tile *tile;
+ int id;
+
+ for_each_tile(tile, xe, id) {
+ struct xe_vram_region *vr;
- vr->dpagemap.dev = dev;
- vr->dpagemap.ops = &xe_drm_pagemap_ops;
+ if (!((BIT(id) << 1) & xe->info.mem_region_mask))
+ continue;
- if (IS_ERR(addr)) {
- devm_release_mem_region(dev, res->start, resource_size(res));
- ret = PTR_ERR(addr);
- drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n",
- tile->id, ERR_PTR(ret));
- return ret;
+ vr = xe_tile_to_vr(tile);
+ xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr);
+ if (IS_ERR(xpagemap))
+ break;
+ vm->svm.pagemaps[id] = xpagemap;
+ }
+
+ if (IS_ERR(xpagemap)) {
+ xe_svm_put_pagemaps(vm);
+ return PTR_ERR(xpagemap);
}
- vr->hpa_base = res->start;
- drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n",
- tile->id, vr->io_start, vr->io_start + vr->usable_size, res);
return 0;
}
+
+/**
+ * xe_pagemap_shrinker_create() - Create a drm_pagemap shrinker
+ * @xe: The xe device
+ *
+ * Create a drm_pagemap shrinker and register with the xe device.
+ *
+ * Return: %0 on success, negative error code on failure.
+ */
+int xe_pagemap_shrinker_create(struct xe_device *xe)
+{
+ xe->usm.dpagemap_shrinker = drm_pagemap_shrinker_create_devm(&xe->drm);
+ return PTR_ERR_OR_ZERO(xe->usm.dpagemap_shrinker);
+}
+
+/**
+ * xe_pagemap_cache_create() - Create a drm_pagemap cache
+ * @tile: The tile to register the cache with
+ *
+ * Create a drm_pagemap cache and register with the tile.
+ *
+ * Return: %0 on success, negative error code on failure.
+ */
+int xe_pagemap_cache_create(struct xe_tile *tile)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+
+ if (IS_DGFX(xe)) {
+ struct drm_pagemap_cache *cache =
+ drm_pagemap_cache_create_devm(xe->usm.dpagemap_shrinker);
+
+ if (IS_ERR(cache))
+ return PTR_ERR(cache);
+
+ tile->mem.vram->dpagemap_cache = cache;
+ }
+
+ return 0;
+}
+
+static struct drm_pagemap *xe_devmem_open(struct xe_device *xe, u32 region_instance)
+{
+ u32 tile_id = region_instance - 1;
+ struct xe_pagemap *xpagemap;
+ struct xe_vram_region *vr;
+
+ if (tile_id >= xe->info.tile_count)
+ return ERR_PTR(-ENOENT);
+
+ if (!((BIT(tile_id) << 1) & xe->info.mem_region_mask))
+ return ERR_PTR(-ENOENT);
+
+ vr = xe_tile_to_vr(&xe->tiles[tile_id]);
+
+ /* Returns a reference-counted embedded struct drm_pagemap */
+ xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr);
+ if (IS_ERR(xpagemap))
+ return ERR_CAST(xpagemap);
+
+ return &xpagemap->dpagemap;
+}
+
+/**
+ * xe_drm_pagemap_from_fd() - Return a drm_pagemap pointer from a
+ * (file_descriptor, region_instance) pair.
+ * @fd: An fd opened against an xe device.
+ * @region_instance: The region instance representing the device memory
+ * on the opened xe device.
+ *
+ * Opens a struct drm_pagemap pointer on the
+ * indicated device and region_instance.
+ *
+ * Return: A reference-counted struct drm_pagemap pointer on success,
+ * negative error pointer on failure.
+ */
+struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance)
+{
+ struct drm_pagemap *dpagemap;
+ struct file *file;
+ struct drm_file *fpriv;
+ struct drm_device *drm;
+ int idx;
+
+ if (fd <= 0)
+ return ERR_PTR(-EINVAL);
+
+ file = fget(fd);
+ if (!file)
+ return ERR_PTR(-ENOENT);
+
+ if (!xe_is_xe_file(file)) {
+ dpagemap = ERR_PTR(-ENOENT);
+ goto out;
+ }
+
+ fpriv = file->private_data;
+ drm = fpriv->minor->dev;
+ if (!drm_dev_enter(drm, &idx)) {
+ dpagemap = ERR_PTR(-ENODEV);
+ goto out;
+ }
+
+ dpagemap = xe_devmem_open(to_xe_device(drm), region_instance);
+ drm_dev_exit(idx);
+out:
+ fput(file);
+ return dpagemap;
+}
+
#else
-int xe_svm_alloc_vram(struct xe_tile *tile,
- struct xe_svm_range *range,
- const struct drm_gpusvm_ctx *ctx)
+
+int xe_pagemap_shrinker_create(struct xe_device *xe)
{
- return -EOPNOTSUPP;
+ return 0;
}
-int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
+int xe_pagemap_cache_create(struct xe_tile *tile)
{
return 0;
}
+int xe_svm_alloc_vram(struct xe_svm_range *range,
+ const struct drm_gpusvm_ctx *ctx,
+ struct drm_pagemap *dpagemap)
+{
+ return -EOPNOTSUPP;
+}
+
struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile)
{
return NULL;
}
+
+struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance)
+{
+ return ERR_PTR(-ENOENT);
+}
+
#endif
/**
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index fa757dd07954..b7b8eeacf196 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -6,29 +6,22 @@
#ifndef _XE_SVM_H_
#define _XE_SVM_H_
-struct xe_device;
-
-/**
- * xe_svm_devm_owner() - Return the owner of device private memory
- * @xe: The xe device.
- *
- * Return: The owner of this device's device private memory to use in
- * hmm_range_fault()-
- */
-static inline void *xe_svm_devm_owner(struct xe_device *xe)
-{
- return xe;
-}
-
#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
#include <drm/drm_pagemap.h>
#include <drm/drm_gpusvm.h>
+#include <drm/drm_pagemap_util.h>
#define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
+#define XE_INTERCONNECT_P2P (XE_INTERCONNECT_VRAM + 1)
+
+struct drm_device;
+struct drm_file;
struct xe_bo;
struct xe_gt;
+struct xe_device;
+struct xe_vram_region;
struct xe_tile;
struct xe_vm;
struct xe_vma;
@@ -56,6 +49,24 @@ struct xe_svm_range {
};
/**
+ * struct xe_pagemap - Manages xe device_private memory for SVM.
+ * @pagemap: The struct dev_pagemap providing the struct pages.
+ * @dpagemap: The drm_pagemap managing allocation and migration.
+ * @destroy_work: Handles asnynchronous destruction and caching.
+ * @peer: Used for pagemap owner computation.
+ * @hpa_base: The host physical address base for the managemd memory.
+ * @vr: Backpointer to the xe_vram region.
+ */
+struct xe_pagemap {
+ struct dev_pagemap pagemap;
+ struct drm_pagemap dpagemap;
+ struct work_struct destroy_work;
+ struct drm_pagemap_peer peer;
+ resource_size_t hpa_base;
+ struct xe_vram_region *vr;
+};
+
+/**
* xe_svm_range_pages_valid() - SVM range pages valid
* @range: SVM range
*
@@ -84,8 +95,8 @@ int xe_svm_bo_evict(struct xe_bo *bo);
void xe_svm_range_debug(struct xe_svm_range *range, const char *operation);
-int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
- const struct drm_gpusvm_ctx *ctx);
+int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx,
+ struct drm_pagemap *dpagemap);
struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr,
struct xe_vma *vma, struct drm_gpusvm_ctx *ctx);
@@ -94,13 +105,13 @@ int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range,
struct drm_gpusvm_ctx *ctx);
bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma,
- bool preferred_region_is_vram);
+ const struct drm_pagemap *dpagemap);
void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range);
bool xe_svm_range_validate(struct xe_vm *vm,
struct xe_svm_range *range,
- u8 tile_mask, bool devmem_preferred);
+ u8 tile_mask, const struct drm_pagemap *dpagemap);
u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma);
@@ -110,6 +121,8 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end);
struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile);
+void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem);
+
/**
* xe_svm_range_has_dma_mapping() - SVM range has DMA mapping
* @range: SVM range
@@ -171,6 +184,12 @@ static inline unsigned long xe_svm_range_size(struct xe_svm_range *range)
void xe_svm_flush(struct xe_vm *vm);
+int xe_pagemap_shrinker_create(struct xe_device *xe);
+
+int xe_pagemap_cache_create(struct xe_tile *tile);
+
+struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance);
+
#else
#include <linux/interval_tree.h>
#include "xe_vm.h"
@@ -179,13 +198,14 @@ struct drm_pagemap_addr;
struct drm_gpusvm_ctx;
struct drm_gpusvm_range;
struct xe_bo;
-struct xe_gt;
+struct xe_device;
struct xe_vm;
struct xe_vma;
struct xe_tile;
struct xe_vram_region;
#define XE_INTERCONNECT_VRAM 1
+#define XE_INTERCONNECT_P2P (XE_INTERCONNECT_VRAM + 1)
struct xe_svm_range {
struct {
@@ -260,8 +280,8 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
}
static inline int
-xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
- const struct drm_gpusvm_ctx *ctx)
+xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx,
+ struct drm_pagemap *dpagemap)
{
return -EOPNOTSUPP;
}
@@ -302,7 +322,7 @@ static inline unsigned long xe_svm_range_size(struct xe_svm_range *range)
static inline
bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma,
- u32 region)
+ const struct drm_pagemap *dpagemap)
{
return false;
}
@@ -343,9 +363,30 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t
return NULL;
}
+static inline void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem)
+{
+ return NULL;
+}
+
static inline void xe_svm_flush(struct xe_vm *vm)
{
}
+
+static inline int xe_pagemap_shrinker_create(struct xe_device *xe)
+{
+ return 0;
+}
+
+static inline int xe_pagemap_cache_create(struct xe_tile *tile)
+{
+ return 0;
+}
+
+static inline struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance)
+{
+ return ERR_PTR(-ENOENT);
+}
+
#define xe_svm_range_has_dma_mapping(...) false
#endif /* CONFIG_DRM_XE_GPUSVM */
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 63c060c2ea5c..eb262aad11da 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -6,6 +6,7 @@
#include <linux/fault-inject.h>
#include <drm/drm_managed.h>
+#include <drm/drm_pagemap_util.h>
#include "xe_bo.h"
#include "xe_device.h"
@@ -180,17 +181,19 @@ ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */
int xe_tile_init_noalloc(struct xe_tile *tile)
{
struct xe_device *xe = tile_to_xe(tile);
+ int err;
xe_wa_apply_tile_workarounds(tile);
- if (xe->info.has_usm && IS_DGFX(xe))
- xe_devm_add(tile, tile->mem.vram);
+ err = xe_pagemap_cache_create(tile);
+ if (err)
+ return err;
if (IS_DGFX(xe) && !ttm_resource_manager_used(&tile->mem.vram->ttm.manager)) {
- int err = xe_ttm_vram_mgr_init(xe, tile->mem.vram);
-
+ err = xe_ttm_vram_mgr_init(xe, tile->mem.vram);
if (err)
return err;
+
xe->info.mem_region_mask |= BIT(tile->mem.vram->id) << 1;
}
@@ -220,3 +223,26 @@ void xe_tile_migrate_wait(struct xe_tile *tile)
{
xe_migrate_wait(tile->migrate);
}
+
+#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
+/**
+ * xe_tile_local_pagemap() - Return a pointer to the tile's local drm_pagemap if any
+ * @tile: The tile.
+ *
+ * Return: A pointer to the tile's local drm_pagemap, or NULL if local pagemap
+ * support has been compiled out.
+ */
+struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile)
+{
+ struct drm_pagemap *dpagemap =
+ drm_pagemap_get_from_cache_if_active(xe_tile_to_vr(tile)->dpagemap_cache);
+
+ if (dpagemap) {
+ xe_assert(tile_to_xe(tile), kref_read(&dpagemap->ref) >= 2);
+ drm_pagemap_put(dpagemap);
+ }
+
+ return dpagemap;
+}
+#endif
+
diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h
index dceb6297aa01..734132eddda5 100644
--- a/drivers/gpu/drm/xe/xe_tile.h
+++ b/drivers/gpu/drm/xe/xe_tile.h
@@ -8,6 +8,7 @@
#include "xe_device_types.h"
+struct xe_pagemap;
struct xe_tile;
int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id);
@@ -23,4 +24,24 @@ static inline bool xe_tile_is_root(struct xe_tile *tile)
return tile->id == 0;
}
+/**
+ * xe_tile_to_vr() - Return the struct xe_vram_region pointer from a
+ * struct xe_tile pointer
+ * @tile: Pointer to the struct xe_tile.
+ *
+ * Return: Pointer to the struct xe_vram_region embedded in *@tile.
+ */
+static inline struct xe_vram_region *xe_tile_to_vr(struct xe_tile *tile)
+{
+ return tile->mem.vram;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
+struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile);
+#else
+static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile)
+{
+ return NULL;
+}
+#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c
index 0d9130b1958a..e120323c43bc 100644
--- a/drivers/gpu/drm/xe/xe_userptr.c
+++ b/drivers/gpu/drm/xe/xe_userptr.c
@@ -55,7 +55,7 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
struct xe_device *xe = vm->xe;
struct drm_gpusvm_ctx ctx = {
.read_only = xe_vma_read_only(vma),
- .device_private_page_owner = xe_svm_devm_owner(xe),
+ .device_private_page_owner = xe_svm_private_page_owner(vm, false),
.allow_mixed = true,
};
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 95e22ff95ea8..a07d8b53de66 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -957,14 +957,37 @@ free_ops:
return fence;
}
+static void xe_vma_mem_attr_fini(struct xe_vma_mem_attr *attr)
+{
+ drm_pagemap_put(attr->preferred_loc.dpagemap);
+}
+
static void xe_vma_free(struct xe_vma *vma)
{
+ xe_vma_mem_attr_fini(&vma->attr);
+
if (xe_vma_is_userptr(vma))
kfree(to_userptr_vma(vma));
else
kfree(vma);
}
+/**
+ * xe_vma_mem_attr_copy() - copy an xe_vma_mem_attr structure.
+ * @to: Destination.
+ * @from: Source.
+ *
+ * Copies an xe_vma_mem_attr structure taking care to get reference
+ * counting of individual members right.
+ */
+void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from)
+{
+ xe_vma_mem_attr_fini(to);
+ *to = *from;
+ if (to->preferred_loc.dpagemap)
+ drm_pagemap_get(to->preferred_loc.dpagemap);
+}
+
static struct xe_vma *xe_vma_create(struct xe_vm *vm,
struct xe_bo *bo,
u64 bo_offset_or_userptr,
@@ -1015,8 +1038,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
if (vm->xe->info.has_atomic_enable_pte_bit)
vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
- vma->attr = *attr;
-
+ xe_vma_mem_attr_copy(&vma->attr, attr);
if (bo) {
struct drm_gpuvm_bo *vm_bo;
@@ -2320,7 +2342,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
struct xe_tile *tile;
struct xe_svm_range *svm_range;
struct drm_gpusvm_ctx ctx = {};
- struct drm_pagemap *dpagemap;
+ struct drm_pagemap *dpagemap = NULL;
u8 id, tile_mask = 0;
u32 i;
@@ -2338,23 +2360,17 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
op->prefetch_range.ranges_count = 0;
- tile = NULL;
if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
dpagemap = xe_vma_resolve_pagemap(vma,
xe_device_get_root_tile(vm->xe));
- /*
- * TODO: Once multigpu support is enabled will need
- * something to dereference tile from dpagemap.
- */
- if (dpagemap)
- tile = xe_device_get_root_tile(vm->xe);
} else if (prefetch_region) {
tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
XE_PL_VRAM0];
+ dpagemap = xe_tile_local_pagemap(tile);
}
- op->prefetch_range.tile = tile;
+ op->prefetch_range.dpagemap = dpagemap;
alloc_next_range:
svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
@@ -2373,7 +2389,7 @@ alloc_next_range:
goto unwind_prefetch_ops;
}
- if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) {
+ if (xe_svm_range_validate(vm, svm_range, tile_mask, dpagemap)) {
xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
goto check_next_range;
}
@@ -2895,7 +2911,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
{
bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
- struct xe_tile *tile = op->prefetch_range.tile;
+ struct drm_pagemap *dpagemap = op->prefetch_range.dpagemap;
int err = 0;
struct xe_svm_range *svm_range;
@@ -2908,15 +2924,22 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
ctx.read_only = xe_vma_read_only(vma);
ctx.devmem_possible = devmem_possible;
ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
- ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe);
+ ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap);
/* TODO: Threading the migration */
xa_for_each(&op->prefetch_range.range, i, svm_range) {
- if (!tile)
+ if (!dpagemap)
xe_svm_range_migrate_to_smem(vm, svm_range);
- if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) {
- err = xe_svm_alloc_vram(tile, svm_range, &ctx);
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) {
+ drm_dbg(&vm->xe->drm,
+ "Prefetch pagemap is %s start 0x%016lx end 0x%016lx\n",
+ dpagemap ? dpagemap->drm->unique : "system",
+ xe_svm_range_start(svm_range), xe_svm_range_end(svm_range));
+ }
+
+ if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, dpagemap)) {
+ err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap);
if (err) {
drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
@@ -4324,7 +4347,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm,
struct drm_gpuva_op *__op;
unsigned int vma_flags = 0;
bool remap_op = false;
- struct xe_vma_mem_attr tmp_attr;
+ struct xe_vma_mem_attr tmp_attr = {};
u16 default_pat;
int err;
@@ -4419,7 +4442,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm,
* VMA, so they can be assigned to newly MAP created vma.
*/
if (is_madvise)
- tmp_attr = vma->attr;
+ xe_vma_mem_attr_copy(&tmp_attr, &vma->attr);
xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
} else if (__op->op == DRM_GPUVA_OP_MAP) {
@@ -4429,12 +4452,13 @@ static int xe_vm_alloc_vma(struct xe_vm *vm,
* copy them to new vma.
*/
if (is_madvise)
- vma->attr = tmp_attr;
+ xe_vma_mem_attr_copy(&vma->attr, &tmp_attr);
}
}
xe_vm_unlock(vm);
drm_gpuva_ops_free(&vm->gpuvm, ops);
+ xe_vma_mem_attr_fini(&tmp_attr);
return 0;
unwind_ops:
@@ -4532,3 +4556,4 @@ int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t r
return xe_vm_alloc_vma(vm, &map_req, false);
}
+
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 361f10b3c453..7d11ca47d73e 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -414,4 +414,5 @@ static inline struct drm_exec *xe_vm_validation_exec(struct xe_vm *vm)
#define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \
((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id))
+void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from);
#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index cad3cf627c3f..add9a6ca2390 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -22,6 +22,19 @@ struct xe_vmas_in_madvise_range {
bool has_svm_userptr_vmas;
};
+/**
+ * struct xe_madvise_details - Argument to madvise_funcs
+ * @dpagemap: Reference-counted pointer to a struct drm_pagemap.
+ *
+ * The madvise IOCTL handler may, in addition to the user-space
+ * args, have additional info to pass into the madvise_func that
+ * handles the madvise type. Use a struct_xe_madvise_details
+ * for that and extend the struct as necessary.
+ */
+struct xe_madvise_details {
+ struct drm_pagemap *dpagemap;
+};
+
static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
{
u64 addr = madvise_range->addr;
@@ -74,34 +87,41 @@ static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_r
static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
struct xe_vma **vmas, int num_vmas,
- struct drm_xe_madvise *op)
+ struct drm_xe_madvise *op,
+ struct xe_madvise_details *details)
{
int i;
xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC);
for (i = 0; i < num_vmas; i++) {
+ struct xe_vma *vma = vmas[i];
+ struct xe_vma_preferred_loc *loc = &vma->attr.preferred_loc;
+
/*TODO: Extend attributes to bo based vmas */
- if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd &&
- vmas[i]->attr.preferred_loc.migration_policy ==
- op->preferred_mem_loc.migration_policy) ||
- !xe_vma_is_cpu_addr_mirror(vmas[i])) {
- vmas[i]->skip_invalidation = true;
+ if ((loc->devmem_fd == op->preferred_mem_loc.devmem_fd &&
+ loc->migration_policy == op->preferred_mem_loc.migration_policy) ||
+ !xe_vma_is_cpu_addr_mirror(vma)) {
+ vma->skip_invalidation = true;
} else {
- vmas[i]->skip_invalidation = false;
- vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd;
+ vma->skip_invalidation = false;
+ loc->devmem_fd = op->preferred_mem_loc.devmem_fd;
/* Till multi-device support is not added migration_policy
* is of no use and can be ignored.
*/
- vmas[i]->attr.preferred_loc.migration_policy =
- op->preferred_mem_loc.migration_policy;
+ loc->migration_policy = op->preferred_mem_loc.migration_policy;
+ drm_pagemap_put(loc->dpagemap);
+ loc->dpagemap = NULL;
+ if (details->dpagemap)
+ loc->dpagemap = drm_pagemap_get(details->dpagemap);
}
}
}
static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
struct xe_vma **vmas, int num_vmas,
- struct drm_xe_madvise *op)
+ struct drm_xe_madvise *op,
+ struct xe_madvise_details *details)
{
struct xe_bo *bo;
int i;
@@ -142,7 +162,8 @@ static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
struct xe_vma **vmas, int num_vmas,
- struct drm_xe_madvise *op)
+ struct drm_xe_madvise *op,
+ struct xe_madvise_details *details)
{
int i;
@@ -160,7 +181,8 @@ static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
struct xe_vma **vmas, int num_vmas,
- struct drm_xe_madvise *op);
+ struct drm_xe_madvise *op,
+ struct xe_madvise_details *details);
static const madvise_func madvise_funcs[] = {
[DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
@@ -244,11 +266,12 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv
if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM))
return false;
- if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy >
- DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
+ if (XE_IOCTL_DBG(xe, fd <= DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
+ args->preferred_mem_loc.region_instance != 0))
return false;
- if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad))
+ if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy >
+ DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
return false;
if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved))
@@ -294,6 +317,41 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv
return true;
}
+static int xe_madvise_details_init(struct xe_vm *vm, const struct drm_xe_madvise *args,
+ struct xe_madvise_details *details)
+{
+ struct xe_device *xe = vm->xe;
+
+ memset(details, 0, sizeof(*details));
+
+ if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) {
+ int fd = args->preferred_mem_loc.devmem_fd;
+ struct drm_pagemap *dpagemap;
+
+ if (fd <= 0)
+ return 0;
+
+ dpagemap = xe_drm_pagemap_from_fd(args->preferred_mem_loc.devmem_fd,
+ args->preferred_mem_loc.region_instance);
+ if (XE_IOCTL_DBG(xe, IS_ERR(dpagemap)))
+ return PTR_ERR(dpagemap);
+
+ /* Don't allow a foreign placement without a fast interconnect! */
+ if (XE_IOCTL_DBG(xe, dpagemap->pagemap->owner != vm->svm.peer.owner)) {
+ drm_pagemap_put(dpagemap);
+ return -ENOLINK;
+ }
+ details->dpagemap = dpagemap;
+ }
+
+ return 0;
+}
+
+static void xe_madvise_details_fini(struct xe_madvise_details *details)
+{
+ drm_pagemap_put(details->dpagemap);
+}
+
static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
int num_vmas, u32 atomic_val)
{
@@ -347,6 +405,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
struct drm_xe_madvise *args = data;
struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
.range = args->range, };
+ struct xe_madvise_details details;
struct xe_vm *vm;
struct drm_exec exec;
int err, attr_type;
@@ -371,13 +430,17 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
goto unlock_vm;
}
- err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
+ err = xe_madvise_details_init(vm, args, &details);
if (err)
goto unlock_vm;
+ err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
+ if (err)
+ goto madv_fini;
+
err = get_vmas(vm, &madvise_range);
if (err || !madvise_range.num_vmas)
- goto unlock_vm;
+ goto madv_fini;
if (madvise_range.has_bo_vmas) {
if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
@@ -385,7 +448,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
madvise_range.num_vmas,
args->atomic.val)) {
err = -EINVAL;
- goto unlock_vm;
+ goto madv_fini;
}
}
@@ -411,7 +474,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
}
attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
- madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args);
+ madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args,
+ &details);
err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
@@ -423,6 +487,8 @@ err_fini:
drm_exec_fini(&exec);
kfree(madvise_range.vmas);
madvise_range.vmas = NULL;
+madv_fini:
+ xe_madvise_details_fini(&details);
unlock_vm:
up_write(&vm->lock);
put_vm:
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 18bad1dd08e6..437f64202f3b 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -8,6 +8,7 @@
#include <drm/drm_gpusvm.h>
#include <drm/drm_gpuvm.h>
+#include <drm/drm_pagemap_util.h>
#include <linux/dma-resv.h>
#include <linux/kref.h>
@@ -19,6 +20,8 @@
#include "xe_range_fence.h"
#include "xe_userptr.h"
+struct drm_pagemap;
+
struct xe_bo;
struct xe_svm_range;
struct xe_sync_entry;
@@ -53,7 +56,7 @@ struct xe_vm_pgtable_update_op;
*/
struct xe_vma_mem_attr {
/** @preferred_loc: preferred memory_location */
- struct {
+ struct xe_vma_preferred_loc {
/** @preferred_loc.migration_policy: Pages migration policy */
u32 migration_policy;
@@ -64,6 +67,13 @@ struct xe_vma_mem_attr {
* closest device memory respectively.
*/
u32 devmem_fd;
+ /**
+ * @preferred_loc.dpagemap: Reference-counted pointer to the drm_pagemap preferred
+ * for migration on a SVM page-fault. The pointer is protected by the
+ * vm lock, and is %NULL if @devmem_fd should be consulted for special
+ * values.
+ */
+ struct drm_pagemap *dpagemap;
} preferred_loc;
/**
@@ -191,6 +201,9 @@ struct xe_vm {
*/
struct work_struct work;
} garbage_collector;
+ struct xe_pagemap *pagemaps[XE_MAX_TILES_PER_DEVICE];
+ /** @svm.peer: Used for pagemap connectivity computations. */
+ struct drm_pagemap_peer peer;
} svm;
struct xe_device *xe;
@@ -395,10 +408,10 @@ struct xe_vma_op_prefetch_range {
/** @ranges_count: number of svm ranges to map */
u32 ranges_count;
/**
- * @tile: Pointer to the tile structure containing memory to prefetch.
- * NULL if prefetch requested region is smem
+ * @dpagemap: Pointer to the dpagemap structure containing memory to prefetch.
+ * NULL if prefetch requested region is smem
*/
- struct xe_tile *tile;
+ struct drm_pagemap *dpagemap;
};
/** enum xe_vma_op_flags - flags for VMA operation */
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index 1b9e9b028975..c64d98bf1723 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -25,97 +25,6 @@
#include "xe_vram.h"
#include "xe_vram_types.h"
-static void resize_bar(struct xe_device *xe, int resno, resource_size_t size)
-{
- struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- int bar_size = pci_rebar_bytes_to_size(size);
- int ret;
-
- ret = pci_resize_resource(pdev, resno, bar_size, 0);
- if (ret) {
- drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n",
- resno, 1 << bar_size, ERR_PTR(ret));
- return;
- }
-
- drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size);
-}
-
-/*
- * if force_vram_bar_size is set, attempt to set to the requested size
- * else set to maximum possible size
- */
-void xe_vram_resize_bar(struct xe_device *xe)
-{
- int force_vram_bar_size = xe_modparam.force_vram_bar_size;
- struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- struct pci_bus *root = pdev->bus;
- resource_size_t current_size;
- resource_size_t rebar_size;
- struct resource *root_res;
- int max_size, i;
- u32 pci_cmd;
-
- /* gather some relevant info */
- current_size = pci_resource_len(pdev, LMEM_BAR);
-
- if (force_vram_bar_size < 0)
- return;
-
- /* set to a specific size? */
- if (force_vram_bar_size) {
- rebar_size = pci_rebar_bytes_to_size(force_vram_bar_size *
- (resource_size_t)SZ_1M);
-
- if (!pci_rebar_size_supported(pdev, LMEM_BAR, rebar_size)) {
- drm_info(&xe->drm,
- "Requested size: %lluMiB is not supported by rebar sizes: 0x%llx. Leaving default: %lluMiB\n",
- (u64)pci_rebar_size_to_bytes(rebar_size) >> 20,
- pci_rebar_get_possible_sizes(pdev, LMEM_BAR),
- (u64)current_size >> 20);
- return;
- }
-
- rebar_size = pci_rebar_size_to_bytes(rebar_size);
- if (rebar_size == current_size)
- return;
- } else {
- max_size = pci_rebar_get_max_size(pdev, LMEM_BAR);
- if (max_size < 0)
- return;
- rebar_size = pci_rebar_size_to_bytes(max_size);
-
- /* only resize if larger than current */
- if (rebar_size <= current_size)
- return;
- }
-
- drm_info(&xe->drm, "Attempting to resize bar from %lluMiB -> %lluMiB\n",
- (u64)current_size >> 20, (u64)rebar_size >> 20);
-
- while (root->parent)
- root = root->parent;
-
- pci_bus_for_each_resource(root, root_res, i) {
- if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
- (u64)root_res->start > 0x100000000ul)
- break;
- }
-
- if (!root_res) {
- drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n");
- return;
- }
-
- pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
- pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
-
- resize_bar(xe, LMEM_BAR, rebar_size);
-
- pci_assign_unassigned_bus_resources(pdev->bus);
- pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
-}
-
static bool resource_is_valid(struct pci_dev *pdev, int bar)
{
if (!pci_resource_flags(pdev, bar))
diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h
index 13505cfb184d..72860f714fc6 100644
--- a/drivers/gpu/drm/xe/xe_vram.h
+++ b/drivers/gpu/drm/xe/xe_vram.h
@@ -11,7 +11,6 @@
struct xe_device;
struct xe_vram_region;
-void xe_vram_resize_bar(struct xe_device *xe);
int xe_vram_probe(struct xe_device *xe);
struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement);
diff --git a/drivers/gpu/drm/xe/xe_vram_types.h b/drivers/gpu/drm/xe/xe_vram_types.h
index 83772dcbf1af..646e3c12ae9f 100644
--- a/drivers/gpu/drm/xe/xe_vram_types.h
+++ b/drivers/gpu/drm/xe/xe_vram_types.h
@@ -66,19 +66,8 @@ struct xe_vram_region {
#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
/** @migrate: Back pointer to migrate */
struct xe_migrate *migrate;
- /** @pagemap: Used to remap device memory as ZONE_DEVICE */
- struct dev_pagemap pagemap;
- /**
- * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory
- * pages of this tile.
- */
- struct drm_pagemap dpagemap;
- /**
- * @hpa_base: base host physical address
- *
- * This is generated when remap device memory as ZONE_DEVICE
- */
- resource_size_t hpa_base;
+ /** @dpagemap_cache: drm_pagemap cache. */
+ struct drm_pagemap_cache *dpagemap_cache;
#endif
};
diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c
index 8f23a27871b6..c83ea3d48fae 100644
--- a/drivers/gpu/drm/xe/xe_vsec.c
+++ b/drivers/gpu/drm/xe/xe_vsec.c
@@ -158,13 +158,15 @@ int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_off
guard(mutex)(&xe->pmt.lock);
+ if (!xe->soc_remapper.set_telem_region)
+ return -ENODEV;
+
/* indicate that we are not at an appropriate power level */
if (!xe_pm_runtime_get_if_active(xe))
return -ENODATA;
/* set SoC re-mapper index register based on GUID memory region */
- xe_mmio_rmw32(xe_root_tile_mmio(xe), SG_REMAP_INDEX1, SG_REMAP_BITS,
- REG_FIELD_PREP(SG_REMAP_BITS, mem_region));
+ xe->soc_remapper.set_telem_region(xe, mem_region);
memcpy_fromio(data, telem_addr, count);
xe_pm_runtime_put(xe);