diff options
| author | Dave Airlie <airlied@redhat.com> | 2026-01-01 17:00:22 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2026-01-01 17:00:59 +1000 |
| commit | 59260fe5821ad108d0fda8a4a4fe0448e9821f27 (patch) | |
| tree | b6581b5aa5cae8b4fc1f21761211a87eaa5a6dc6 /drivers/gpu/drm | |
| parent | 9ec3c8ee16a07dff8be82aba595dd77c134c03c2 (diff) | |
| parent | 0b075f82935e82fc9fff90d06d2a161caaebd9c3 (diff) | |
Merge tag 'drm-xe-next-2025-12-30' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
Core Changes:
- Dynamic pagemaps and multi-device SVM (Thomas)
Driver Changes:
- Introduce SRIOV scheduler Groups (Daniele)
- Configure migration queue as low latency (Francois)
- Don't use absolute path in generated header comment (Calvin Owens)
- Add SoC remapper support for system controller (Umesh)
- Insert compiler barriers in GuC code (Jonathan)
- Rebar updates (Lucas)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/aVOiULyYdnFbq-JB@fedora
Diffstat (limited to 'drivers/gpu/drm')
58 files changed, 3622 insertions, 473 deletions
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 0e1c668b46d2..0deee72ef935 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -109,7 +109,8 @@ obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o drm_gpusvm_helper-y := \ drm_gpusvm.o\ - drm_pagemap.o + drm_pagemap.o\ + drm_pagemap_util.o obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 39c8c50401dd..aa9a0b60e727 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -744,6 +744,127 @@ err_free: } /** + * drm_gpusvm_scan_mm() - Check the migration state of a drm_gpusvm_range + * @range: Pointer to the struct drm_gpusvm_range to check. + * @dev_private_owner: The struct dev_private_owner to use to determine + * compatible device-private pages. + * @pagemap: The struct dev_pagemap pointer to use for pagemap-specific + * checks. + * + * Scan the CPU address space corresponding to @range and return the + * current migration state. Note that the result may be invalid as + * soon as the function returns. It's an advisory check. + * + * TODO: Bail early and call hmm_range_fault() for subranges. + * + * Return: See &enum drm_gpusvm_scan_result. + */ +enum drm_gpusvm_scan_result drm_gpusvm_scan_mm(struct drm_gpusvm_range *range, + void *dev_private_owner, + const struct dev_pagemap *pagemap) +{ + struct mmu_interval_notifier *notifier = &range->notifier->notifier; + unsigned long start = drm_gpusvm_range_start(range); + unsigned long end = drm_gpusvm_range_end(range); + struct hmm_range hmm_range = { + .default_flags = 0, + .notifier = notifier, + .start = start, + .end = end, + .dev_private_owner = dev_private_owner, + }; + unsigned long timeout = + jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); + enum drm_gpusvm_scan_result state = DRM_GPUSVM_SCAN_UNPOPULATED, new_state; + unsigned long *pfns; + unsigned long npages = npages_in_range(start, end); + const struct dev_pagemap *other = NULL; + int err, i; + + pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); + if (!pfns) + return DRM_GPUSVM_SCAN_UNPOPULATED; + + hmm_range.hmm_pfns = pfns; + +retry: + hmm_range.notifier_seq = mmu_interval_read_begin(notifier); + mmap_read_lock(range->gpusvm->mm); + + while (true) { + err = hmm_range_fault(&hmm_range); + if (err == -EBUSY) { + if (time_after(jiffies, timeout)) + break; + + hmm_range.notifier_seq = + mmu_interval_read_begin(notifier); + continue; + } + break; + } + mmap_read_unlock(range->gpusvm->mm); + if (err) + goto err_free; + + drm_gpusvm_notifier_lock(range->gpusvm); + if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) { + drm_gpusvm_notifier_unlock(range->gpusvm); + goto retry; + } + + for (i = 0; i < npages;) { + struct page *page; + const struct dev_pagemap *cur = NULL; + + if (!(pfns[i] & HMM_PFN_VALID)) { + state = DRM_GPUSVM_SCAN_UNPOPULATED; + goto err_free; + } + + page = hmm_pfn_to_page(pfns[i]); + if (is_device_private_page(page) || + is_device_coherent_page(page)) + cur = page_pgmap(page); + + if (cur == pagemap) { + new_state = DRM_GPUSVM_SCAN_EQUAL; + } else if (cur && (cur == other || !other)) { + new_state = DRM_GPUSVM_SCAN_OTHER; + other = cur; + } else if (cur) { + new_state = DRM_GPUSVM_SCAN_MIXED_DEVICE; + } else { + new_state = DRM_GPUSVM_SCAN_SYSTEM; + } + + /* + * TODO: Could use an array for state + * transitions, and caller might want it + * to bail early for some results. + */ + if (state == DRM_GPUSVM_SCAN_UNPOPULATED) { + state = new_state; + } else if (state != new_state) { + if (new_state == DRM_GPUSVM_SCAN_SYSTEM || + state == DRM_GPUSVM_SCAN_SYSTEM) + state = DRM_GPUSVM_SCAN_MIXED; + else if (state != DRM_GPUSVM_SCAN_MIXED) + state = DRM_GPUSVM_SCAN_MIXED_DEVICE; + } + + i += 1ul << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); + } + +err_free: + drm_gpusvm_notifier_unlock(range->gpusvm); + + kvfree(pfns); + return state; +} +EXPORT_SYMBOL(drm_gpusvm_scan_mm); + +/** * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range * @gpusvm: Pointer to the GPU SVM structure * @notifier: Pointer to the GPU SVM notifier structure @@ -1038,6 +1159,7 @@ static void __drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, flags.has_dma_mapping = false; WRITE_ONCE(svm_pages->flags.__flags, flags.__flags); + drm_pagemap_put(svm_pages->dpagemap); svm_pages->dpagemap = NULL; } } @@ -1434,6 +1556,8 @@ map_pages: if (pagemap) { flags.has_devmem_pages = true; + drm_pagemap_get(dpagemap); + drm_pagemap_put(svm_pages->dpagemap); svm_pages->dpagemap = dpagemap; } diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index 37d7cfbbb3e8..ba099aa7c52f 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -3,11 +3,14 @@ * Copyright © 2024-2025 Intel Corporation */ +#include <linux/dma-fence.h> #include <linux/dma-mapping.h> #include <linux/migrate.h> #include <linux/pagemap.h> #include <drm/drm_drv.h> #include <drm/drm_pagemap.h> +#include <drm/drm_pagemap_util.h> +#include <drm/drm_print.h> /** * DOC: Overview @@ -62,7 +65,7 @@ * * @refcount: Reference count for the zdd * @devmem_allocation: device memory allocation - * @device_private_page_owner: Device private pages owner + * @dpagemap: Refcounted pointer to the underlying struct drm_pagemap. * * This structure serves as a generic wrapper installed in * page->zone_device_data. It provides infrastructure for looking up a device @@ -74,12 +77,12 @@ struct drm_pagemap_zdd { struct kref refcount; struct drm_pagemap_devmem *devmem_allocation; - void *device_private_page_owner; + struct drm_pagemap *dpagemap; }; /** * drm_pagemap_zdd_alloc() - Allocate a zdd structure. - * @device_private_page_owner: Device private pages owner + * @dpagemap: Pointer to the underlying struct drm_pagemap. * * This function allocates and initializes a new zdd structure. It sets up the * reference count and initializes the destroy work. @@ -87,7 +90,7 @@ struct drm_pagemap_zdd { * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure. */ static struct drm_pagemap_zdd * -drm_pagemap_zdd_alloc(void *device_private_page_owner) +drm_pagemap_zdd_alloc(struct drm_pagemap *dpagemap) { struct drm_pagemap_zdd *zdd; @@ -97,7 +100,7 @@ drm_pagemap_zdd_alloc(void *device_private_page_owner) kref_init(&zdd->refcount); zdd->devmem_allocation = NULL; - zdd->device_private_page_owner = device_private_page_owner; + zdd->dpagemap = drm_pagemap_get(dpagemap); return zdd; } @@ -127,6 +130,7 @@ static void drm_pagemap_zdd_destroy(struct kref *ref) struct drm_pagemap_zdd *zdd = container_of(ref, struct drm_pagemap_zdd, refcount); struct drm_pagemap_devmem *devmem = zdd->devmem_allocation; + struct drm_pagemap *dpagemap = zdd->dpagemap; if (devmem) { complete_all(&devmem->detached); @@ -134,6 +138,7 @@ static void drm_pagemap_zdd_destroy(struct kref *ref) devmem->ops->devmem_release(devmem); } kfree(zdd); + drm_pagemap_put(dpagemap); } /** @@ -201,11 +206,13 @@ static void drm_pagemap_get_devmem_page(struct page *page, /** * drm_pagemap_migrate_map_pages() - Map migration pages for GPU SVM migration - * @dev: The device for which the pages are being mapped - * @pagemap_addr: Array to store DMA information corresponding to mapped pages - * @migrate_pfn: Array of migrate page frame numbers to map - * @npages: Number of pages to map + * @dev: The device performing the migration. + * @local_dpagemap: The drm_pagemap local to the migrating device. + * @pagemap_addr: Array to store DMA information corresponding to mapped pages. + * @migrate_pfn: Array of page frame numbers of system pages or peer pages to map. + * @npages: Number of system pages or peer pages to map. * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) + * @mdetails: Details governing the migration behaviour. * * This function maps pages of memory for migration usage in GPU SVM. It * iterates over each page frame number provided in @migrate_pfn, maps the @@ -215,12 +222,14 @@ static void drm_pagemap_get_devmem_page(struct page *page, * Returns: 0 on success, -EFAULT if an error occurs during mapping. */ static int drm_pagemap_migrate_map_pages(struct device *dev, + struct drm_pagemap *local_dpagemap, struct drm_pagemap_addr *pagemap_addr, unsigned long *migrate_pfn, unsigned long npages, - enum dma_data_direction dir) + enum dma_data_direction dir, + const struct drm_pagemap_migrate_details *mdetails) { - unsigned long i; + unsigned long num_peer_pages = 0, num_local_pages = 0, i; for (i = 0; i < npages;) { struct page *page = migrate_pfn_to_page(migrate_pfn[i]); @@ -231,31 +240,58 @@ static int drm_pagemap_migrate_map_pages(struct device *dev, if (!page) goto next; - if (WARN_ON_ONCE(is_zone_device_page(page))) - return -EFAULT; - folio = page_folio(page); order = folio_order(folio); - dma_addr = dma_map_page(dev, page, 0, page_size(page), dir); - if (dma_mapping_error(dev, dma_addr)) - return -EFAULT; - - pagemap_addr[i] = - drm_pagemap_addr_encode(dma_addr, - DRM_INTERCONNECT_SYSTEM, - order, dir); + if (is_device_private_page(page)) { + struct drm_pagemap_zdd *zdd = page->zone_device_data; + struct drm_pagemap *dpagemap = zdd->dpagemap; + struct drm_pagemap_addr addr; + + if (dpagemap == local_dpagemap) { + if (!mdetails->can_migrate_same_pagemap) + goto next; + + num_local_pages += NR_PAGES(order); + } else { + num_peer_pages += NR_PAGES(order); + } + + addr = dpagemap->ops->device_map(dpagemap, dev, page, order, dir); + if (dma_mapping_error(dev, addr.addr)) + return -EFAULT; + + pagemap_addr[i] = addr; + } else { + dma_addr = dma_map_page(dev, page, 0, page_size(page), dir); + if (dma_mapping_error(dev, dma_addr)) + return -EFAULT; + + pagemap_addr[i] = + drm_pagemap_addr_encode(dma_addr, + DRM_INTERCONNECT_SYSTEM, + order, dir); + } next: i += NR_PAGES(order); } + if (num_peer_pages) + drm_dbg(local_dpagemap->drm, "Migrating %lu peer pages over interconnect.\n", + num_peer_pages); + if (num_local_pages) + drm_dbg(local_dpagemap->drm, "Migrating %lu local pages over interconnect.\n", + num_local_pages); + return 0; } /** * drm_pagemap_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration * @dev: The device for which the pages were mapped + * @migrate_pfn: Array of migrate pfns set up for the mapped pages. Used to + * determine the drm_pagemap of a peer device private page. * @pagemap_addr: Array of DMA information corresponding to mapped pages * @npages: Number of pages to unmap * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) @@ -266,16 +302,27 @@ next: */ static void drm_pagemap_migrate_unmap_pages(struct device *dev, struct drm_pagemap_addr *pagemap_addr, + unsigned long *migrate_pfn, unsigned long npages, enum dma_data_direction dir) { unsigned long i; for (i = 0; i < npages;) { - if (!pagemap_addr[i].addr || dma_mapping_error(dev, pagemap_addr[i].addr)) + struct page *page = migrate_pfn_to_page(migrate_pfn[i]); + + if (!page || !pagemap_addr[i].addr || dma_mapping_error(dev, pagemap_addr[i].addr)) goto next; - dma_unmap_page(dev, pagemap_addr[i].addr, PAGE_SIZE << pagemap_addr[i].order, dir); + if (is_zone_device_page(page)) { + struct drm_pagemap_zdd *zdd = page->zone_device_data; + struct drm_pagemap *dpagemap = zdd->dpagemap; + + dpagemap->ops->device_unmap(dpagemap, dev, pagemap_addr[i]); + } else { + dma_unmap_page(dev, pagemap_addr[i].addr, + PAGE_SIZE << pagemap_addr[i].order, dir); + } next: i += NR_PAGES(pagemap_addr[i].order); @@ -288,18 +335,125 @@ npages_in_range(unsigned long start, unsigned long end) return (end - start) >> PAGE_SHIFT; } +static int +drm_pagemap_migrate_remote_to_local(struct drm_pagemap_devmem *devmem, + struct device *remote_device, + struct drm_pagemap *remote_dpagemap, + unsigned long local_pfns[], + struct page *remote_pages[], + struct drm_pagemap_addr pagemap_addr[], + unsigned long npages, + const struct drm_pagemap_devmem_ops *ops, + const struct drm_pagemap_migrate_details *mdetails) + +{ + int err = drm_pagemap_migrate_map_pages(remote_device, remote_dpagemap, + pagemap_addr, local_pfns, + npages, DMA_FROM_DEVICE, mdetails); + + if (err) + goto out; + + err = ops->copy_to_ram(remote_pages, pagemap_addr, npages, + devmem->pre_migrate_fence); +out: + drm_pagemap_migrate_unmap_pages(remote_device, pagemap_addr, local_pfns, + npages, DMA_FROM_DEVICE); + return err; +} + +static int +drm_pagemap_migrate_sys_to_dev(struct drm_pagemap_devmem *devmem, + unsigned long sys_pfns[], + struct page *local_pages[], + struct drm_pagemap_addr pagemap_addr[], + unsigned long npages, + const struct drm_pagemap_devmem_ops *ops, + const struct drm_pagemap_migrate_details *mdetails) +{ + int err = drm_pagemap_migrate_map_pages(devmem->dev, devmem->dpagemap, + pagemap_addr, sys_pfns, npages, + DMA_TO_DEVICE, mdetails); + + if (err) + goto out; + + err = ops->copy_to_devmem(local_pages, pagemap_addr, npages, + devmem->pre_migrate_fence); +out: + drm_pagemap_migrate_unmap_pages(devmem->dev, pagemap_addr, sys_pfns, npages, + DMA_TO_DEVICE); + return err; +} + +/** + * struct migrate_range_loc - Cursor into the loop over migrate_pfns for migrating to + * device. + * @start: The current loop index. + * @device: migrating device. + * @dpagemap: Pointer to struct drm_pagemap used by the migrating device. + * @ops: The copy ops to be used for the migrating device. + */ +struct migrate_range_loc { + unsigned long start; + struct device *device; + struct drm_pagemap *dpagemap; + const struct drm_pagemap_devmem_ops *ops; +}; + +static int drm_pagemap_migrate_range(struct drm_pagemap_devmem *devmem, + unsigned long src_pfns[], + unsigned long dst_pfns[], + struct page *pages[], + struct drm_pagemap_addr pagemap_addr[], + struct migrate_range_loc *last, + const struct migrate_range_loc *cur, + const struct drm_pagemap_migrate_details *mdetails) +{ + int ret = 0; + + if (cur->start == 0) + goto out; + + if (cur->start <= last->start) + return 0; + + if (cur->dpagemap == last->dpagemap && cur->ops == last->ops) + return 0; + + if (last->dpagemap) + ret = drm_pagemap_migrate_remote_to_local(devmem, + last->device, + last->dpagemap, + &dst_pfns[last->start], + &pages[last->start], + &pagemap_addr[last->start], + cur->start - last->start, + last->ops, mdetails); + + else + ret = drm_pagemap_migrate_sys_to_dev(devmem, + &src_pfns[last->start], + &pages[last->start], + &pagemap_addr[last->start], + cur->start - last->start, + last->ops, mdetails); + +out: + *last = *cur; + return ret; +} + /** * drm_pagemap_migrate_to_devmem() - Migrate a struct mm_struct range to device memory * @devmem_allocation: The device memory allocation to migrate to. * The caller should hold a reference to the device memory allocation, - * and the reference is consumed by this function unless it returns with + * and the reference is consumed by this function even if it returns with * an error. * @mm: Pointer to the struct mm_struct. * @start: Start of the virtual address range to migrate. * @end: End of the virtual address range to migrate. - * @timeslice_ms: The time requested for the migrated pagemap pages to - * be present in @mm before being allowed to be migrated back. - * @pgmap_owner: Not used currently, since only system memory is considered. + * @mdetails: Details to govern the migration. * * This function migrates the specified virtual address range to device memory. * It performs the necessary setup and invokes the driver-specific operations for @@ -317,17 +471,21 @@ npages_in_range(unsigned long start, unsigned long end) int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, struct mm_struct *mm, unsigned long start, unsigned long end, - unsigned long timeslice_ms, - void *pgmap_owner) + const struct drm_pagemap_migrate_details *mdetails) { const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops; + struct drm_pagemap *dpagemap = devmem_allocation->dpagemap; + struct dev_pagemap *pagemap = dpagemap->pagemap; struct migrate_vma migrate = { .start = start, .end = end, - .pgmap_owner = pgmap_owner, - .flags = MIGRATE_VMA_SELECT_SYSTEM, + .pgmap_owner = pagemap->owner, + .flags = MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT | + MIGRATE_VMA_SELECT_DEVICE_PRIVATE, }; unsigned long i, npages = npages_in_range(start, end); + unsigned long own_pages = 0, migrated_pages = 0; + struct migrate_range_loc cur, last = {.device = dpagemap->drm->dev, .ops = ops}; struct vm_area_struct *vas; struct drm_pagemap_zdd *zdd = NULL; struct page **pages; @@ -366,11 +524,13 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages); pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages; - zdd = drm_pagemap_zdd_alloc(pgmap_owner); + zdd = drm_pagemap_zdd_alloc(dpagemap); if (!zdd) { err = -ENOMEM; - goto err_free; + kvfree(buf); + goto err_out; } + zdd->devmem_allocation = devmem_allocation; /* Owns ref */ migrate.vma = vas; migrate.src = buf; @@ -381,54 +541,134 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, goto err_free; if (!migrate.cpages) { - err = -EFAULT; + /* No pages to migrate. Raced or unknown device pages. */ + err = -EBUSY; goto err_free; } if (migrate.cpages != npages) { + /* + * Some pages to migrate. But we want to migrate all or + * nothing. Raced or unknown device pages. + */ err = -EBUSY; - goto err_finalize; + goto err_aborted_migration; } - err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst); - if (err) - goto err_finalize; + /* Count device-private pages to migrate */ + for (i = 0; i < npages;) { + struct page *src_page = migrate_pfn_to_page(migrate.src[i]); + unsigned long nr_pages = src_page ? NR_PAGES(folio_order(page_folio(src_page))) : 1; - err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, pagemap_addr, - migrate.src, npages, DMA_TO_DEVICE); + if (src_page && is_zone_device_page(src_page)) { + if (page_pgmap(src_page) == pagemap) + own_pages += nr_pages; + } + + i += nr_pages; + } + drm_dbg(dpagemap->drm, "Total pages %lu; Own pages: %lu.\n", + npages, own_pages); + if (own_pages == npages) { + err = 0; + drm_dbg(dpagemap->drm, "Migration wasn't necessary.\n"); + goto err_aborted_migration; + } else if (own_pages && !mdetails->can_migrate_same_pagemap) { + err = -EBUSY; + drm_dbg(dpagemap->drm, "Migration aborted due to fragmentation.\n"); + goto err_aborted_migration; + } + + err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst); if (err) goto err_finalize; + own_pages = 0; + for (i = 0; i < npages; ++i) { struct page *page = pfn_to_page(migrate.dst[i]); + struct page *src_page = migrate_pfn_to_page(migrate.src[i]); + cur.start = i; + + pages[i] = NULL; + if (src_page && is_device_private_page(src_page)) { + struct drm_pagemap_zdd *src_zdd = src_page->zone_device_data; - pages[i] = page; + if (page_pgmap(src_page) == pagemap && + !mdetails->can_migrate_same_pagemap) { + migrate.dst[i] = 0; + own_pages++; + continue; + } + if (mdetails->source_peer_migrates) { + cur.dpagemap = src_zdd->dpagemap; + cur.ops = src_zdd->devmem_allocation->ops; + cur.device = cur.dpagemap->drm->dev; + pages[i] = src_page; + } + } + if (!pages[i]) { + cur.dpagemap = NULL; + cur.ops = ops; + cur.device = dpagemap->drm->dev; + pages[i] = page; + } migrate.dst[i] = migrate_pfn(migrate.dst[i]); drm_pagemap_get_devmem_page(page, zdd); - } - err = ops->copy_to_devmem(pages, pagemap_addr, npages); + /* If we switched the migrating drm_pagemap, migrate previous pages now */ + err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst, + pages, pagemap_addr, &last, &cur, + mdetails); + if (err) + goto err_finalize; + } + cur.start = npages; + cur.ops = NULL; /* Force migration */ + err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst, + pages, pagemap_addr, &last, &cur, mdetails); if (err) goto err_finalize; + drm_WARN_ON(dpagemap->drm, !!own_pages); + + dma_fence_put(devmem_allocation->pre_migrate_fence); + devmem_allocation->pre_migrate_fence = NULL; + /* Upon success bind devmem allocation to range and zdd */ devmem_allocation->timeslice_expiration = get_jiffies_64() + - msecs_to_jiffies(timeslice_ms); - zdd->devmem_allocation = devmem_allocation; /* Owns ref */ + msecs_to_jiffies(mdetails->timeslice_ms); err_finalize: if (err) drm_pagemap_migration_unlock_put_pages(npages, migrate.dst); +err_aborted_migration: migrate_vma_pages(&migrate); + + for (i = 0; i < npages;) { + struct page *page = migrate_pfn_to_page(migrate.src[i]); + unsigned long nr_pages = page ? NR_PAGES(folio_order(page_folio(page))) : 1; + + if (migrate.src[i] & MIGRATE_PFN_MIGRATE) + migrated_pages += nr_pages; + + i += nr_pages; + } + + if (!err && migrated_pages < npages - own_pages) { + drm_dbg(dpagemap->drm, "Raced while finalizing migration.\n"); + err = -EBUSY; + } + migrate_vma_finalize(&migrate); - drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, npages, - DMA_TO_DEVICE); err_free: - if (zdd) - drm_pagemap_zdd_put(zdd); + drm_pagemap_zdd_put(zdd); kvfree(buf); + return err; + err_out: + devmem_allocation->ops->devmem_release(devmem_allocation); return err; } EXPORT_SYMBOL_GPL(drm_pagemap_migrate_to_devmem); @@ -538,6 +778,157 @@ next_put: return -ENOMEM; } +static void drm_pagemap_dev_unhold_work(struct work_struct *work); +static LLIST_HEAD(drm_pagemap_unhold_list); +static DECLARE_WORK(drm_pagemap_work, drm_pagemap_dev_unhold_work); + +/** + * struct drm_pagemap_dev_hold - Struct to aid in drm_device release. + * @link: Link into drm_pagemap_unhold_list for deferred reference releases. + * @drm: drm device to put. + * + * When a struct drm_pagemap is released, we also need to release the + * reference it holds on the drm device. However, typically that needs + * to be done separately from a system-wide workqueue. + * Each time a struct drm_pagemap is initialized + * (or re-initialized if cached) therefore allocate a separate + * drm_pagemap_dev_hold item, from which we put the drm device and + * associated module. + */ +struct drm_pagemap_dev_hold { + struct llist_node link; + struct drm_device *drm; +}; + +static void drm_pagemap_release(struct kref *ref) +{ + struct drm_pagemap *dpagemap = container_of(ref, typeof(*dpagemap), ref); + struct drm_pagemap_dev_hold *dev_hold = dpagemap->dev_hold; + + /* + * We know the pagemap provider is alive at this point, since + * the struct drm_pagemap_dev_hold holds a reference to the + * pagemap provider drm_device and its module. + */ + dpagemap->dev_hold = NULL; + drm_pagemap_shrinker_add(dpagemap); + llist_add(&dev_hold->link, &drm_pagemap_unhold_list); + schedule_work(&drm_pagemap_work); + /* + * Here, either the provider device is still alive, since if called from + * page_free(), the caller is holding a reference on the dev_pagemap, + * or if called from drm_pagemap_put(), the direct caller is still alive. + * This ensures we can't race with THIS module unload. + */ +} + +static void drm_pagemap_dev_unhold_work(struct work_struct *work) +{ + struct llist_node *node = llist_del_all(&drm_pagemap_unhold_list); + struct drm_pagemap_dev_hold *dev_hold, *next; + + /* + * Deferred release of drm_pagemap provider device and module. + * THIS module is kept alive during the release by the + * flush_work() in the drm_pagemap_exit() function. + */ + llist_for_each_entry_safe(dev_hold, next, node, link) { + struct drm_device *drm = dev_hold->drm; + struct module *module = drm->driver->fops->owner; + + drm_dbg(drm, "Releasing reference on provider device and module.\n"); + drm_dev_put(drm); + module_put(module); + kfree(dev_hold); + } +} + +static struct drm_pagemap_dev_hold * +drm_pagemap_dev_hold(struct drm_pagemap *dpagemap) +{ + struct drm_pagemap_dev_hold *dev_hold; + struct drm_device *drm = dpagemap->drm; + + dev_hold = kzalloc(sizeof(*dev_hold), GFP_KERNEL); + if (!dev_hold) + return ERR_PTR(-ENOMEM); + + init_llist_node(&dev_hold->link); + dev_hold->drm = drm; + (void)try_module_get(drm->driver->fops->owner); + drm_dev_get(drm); + + return dev_hold; +} + +/** + * drm_pagemap_reinit() - Reinitialize a drm_pagemap + * @dpagemap: The drm_pagemap to reinitialize + * + * Reinitialize a drm_pagemap, for which drm_pagemap_release + * has already been called. This interface is intended for the + * situation where the driver caches a destroyed drm_pagemap. + * + * Return: 0 on success, negative error code on failure. + */ +int drm_pagemap_reinit(struct drm_pagemap *dpagemap) +{ + dpagemap->dev_hold = drm_pagemap_dev_hold(dpagemap); + if (IS_ERR(dpagemap->dev_hold)) + return PTR_ERR(dpagemap->dev_hold); + + kref_init(&dpagemap->ref); + return 0; +} +EXPORT_SYMBOL(drm_pagemap_reinit); + +/** + * drm_pagemap_init() - Initialize a pre-allocated drm_pagemap + * @dpagemap: The drm_pagemap to initialize. + * @pagemap: The associated dev_pagemap providing the device + * private pages. + * @drm: The drm device. The drm_pagemap holds a reference on the + * drm_device and the module owning the drm_device until + * drm_pagemap_release(). This facilitates drm_pagemap exporting. + * @ops: The drm_pagemap ops. + * + * Initialize and take an initial reference on a drm_pagemap. + * After successful return, use drm_pagemap_put() to destroy. + * + ** Return: 0 on success, negative error code on error. + */ +int drm_pagemap_init(struct drm_pagemap *dpagemap, + struct dev_pagemap *pagemap, + struct drm_device *drm, + const struct drm_pagemap_ops *ops) +{ + kref_init(&dpagemap->ref); + dpagemap->ops = ops; + dpagemap->pagemap = pagemap; + dpagemap->drm = drm; + dpagemap->cache = NULL; + INIT_LIST_HEAD(&dpagemap->shrink_link); + + return drm_pagemap_reinit(dpagemap); +} +EXPORT_SYMBOL(drm_pagemap_init); + +/** + * drm_pagemap_put() - Put a struct drm_pagemap reference + * @dpagemap: Pointer to a struct drm_pagemap object. + * + * Puts a struct drm_pagemap reference and frees the drm_pagemap object + * if the refount reaches zero. + */ +void drm_pagemap_put(struct drm_pagemap *dpagemap) +{ + if (likely(dpagemap)) { + drm_pagemap_shrinker_might_lock(dpagemap); + kref_put(&dpagemap->ref, drm_pagemap_release); + } +} +EXPORT_SYMBOL(drm_pagemap_put); + /** * drm_pagemap_evict_to_ram() - Evict GPU SVM range to RAM * @devmem_allocation: Pointer to the device memory allocation @@ -550,6 +941,7 @@ next_put: int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation) { const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops; + struct drm_pagemap_migrate_details mdetails = {}; unsigned long npages, mpages = 0; struct page **pages; unsigned long *src, *dst; @@ -588,15 +980,17 @@ retry: if (err || !mpages) goto err_finalize; - err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, pagemap_addr, - dst, npages, DMA_FROM_DEVICE); + err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, + devmem_allocation->dpagemap, pagemap_addr, + dst, npages, DMA_FROM_DEVICE, + &mdetails); if (err) goto err_finalize; for (i = 0; i < npages; ++i) pages[i] = migrate_pfn_to_page(src[i]); - err = ops->copy_to_ram(pages, pagemap_addr, npages); + err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL); if (err) goto err_finalize; @@ -605,8 +999,9 @@ err_finalize: drm_pagemap_migration_unlock_put_pages(npages, dst); migrate_device_pages(src, dst, npages); migrate_device_finalize(src, dst, npages); - drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, npages, + drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, dst, npages, DMA_FROM_DEVICE); + err_free: kvfree(buf); err_out: @@ -627,8 +1022,7 @@ EXPORT_SYMBOL_GPL(drm_pagemap_evict_to_ram); /** * __drm_pagemap_migrate_to_ram() - Migrate GPU SVM range to RAM (internal) * @vas: Pointer to the VM area structure - * @device_private_page_owner: Device private pages owner - * @page: Pointer to the page for fault handling (can be NULL) + * @page: Pointer to the page for fault handling. * @fault_addr: Fault address * @size: Size of migration * @@ -639,18 +1033,18 @@ EXPORT_SYMBOL_GPL(drm_pagemap_evict_to_ram); * Return: 0 on success, negative error code on failure. */ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, - void *device_private_page_owner, struct page *page, unsigned long fault_addr, unsigned long size) { struct migrate_vma migrate = { .vma = vas, - .pgmap_owner = device_private_page_owner, + .pgmap_owner = page_pgmap(page)->owner, .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | MIGRATE_VMA_SELECT_DEVICE_COHERENT, .fault_page = page, }; + struct drm_pagemap_migrate_details mdetails = {}; struct drm_pagemap_zdd *zdd; const struct drm_pagemap_devmem_ops *ops; struct device *dev = NULL; @@ -661,12 +1055,9 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, void *buf; int i, err = 0; - if (page) { - zdd = page->zone_device_data; - if (time_before64(get_jiffies_64(), - zdd->devmem_allocation->timeslice_expiration)) - return 0; - } + zdd = page->zone_device_data; + if (time_before64(get_jiffies_64(), zdd->devmem_allocation->timeslice_expiration)) + return 0; start = ALIGN_DOWN(fault_addr, size); end = ALIGN(fault_addr + 1, size); @@ -702,19 +1093,6 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, if (!migrate.cpages) goto err_free; - if (!page) { - for (i = 0; i < npages; ++i) { - if (!(migrate.src[i] & MIGRATE_PFN_MIGRATE)) - continue; - - page = migrate_pfn_to_page(migrate.src[i]); - break; - } - - if (!page) - goto err_finalize; - } - zdd = page->zone_device_data; ops = zdd->devmem_allocation->ops; dev = zdd->devmem_allocation->dev; @@ -724,15 +1102,15 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, if (err) goto err_finalize; - err = drm_pagemap_migrate_map_pages(dev, pagemap_addr, migrate.dst, npages, - DMA_FROM_DEVICE); + err = drm_pagemap_migrate_map_pages(dev, zdd->dpagemap, pagemap_addr, migrate.dst, npages, + DMA_FROM_DEVICE, &mdetails); if (err) goto err_finalize; for (i = 0; i < npages; ++i) pages[i] = migrate_pfn_to_page(migrate.src[i]); - err = ops->copy_to_ram(pages, pagemap_addr, npages); + err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL); if (err) goto err_finalize; @@ -742,8 +1120,8 @@ err_finalize: migrate_vma_pages(&migrate); migrate_vma_finalize(&migrate); if (dev) - drm_pagemap_migrate_unmap_pages(dev, pagemap_addr, npages, - DMA_FROM_DEVICE); + drm_pagemap_migrate_unmap_pages(dev, pagemap_addr, migrate.dst, + npages, DMA_FROM_DEVICE); err_free: kvfree(buf); err_out: @@ -780,7 +1158,6 @@ static vm_fault_t drm_pagemap_migrate_to_ram(struct vm_fault *vmf) int err; err = __drm_pagemap_migrate_to_ram(vmf->vma, - zdd->device_private_page_owner, vmf->page, vmf->address, zdd->devmem_allocation->size); @@ -813,11 +1190,14 @@ EXPORT_SYMBOL_GPL(drm_pagemap_pagemap_ops_get); * @ops: Pointer to the operations structure for GPU SVM device memory * @dpagemap: The struct drm_pagemap we're allocating from. * @size: Size of device memory allocation + * @pre_migrate_fence: Fence to wait for or pipeline behind before migration starts. + * (May be NULL). */ void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, struct device *dev, struct mm_struct *mm, const struct drm_pagemap_devmem_ops *ops, - struct drm_pagemap *dpagemap, size_t size) + struct drm_pagemap *dpagemap, size_t size, + struct dma_fence *pre_migrate_fence) { init_completion(&devmem_allocation->detached); devmem_allocation->dev = dev; @@ -825,6 +1205,7 @@ void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, devmem_allocation->ops = ops; devmem_allocation->dpagemap = dpagemap; devmem_allocation->size = size; + devmem_allocation->pre_migrate_fence = pre_migrate_fence; } EXPORT_SYMBOL_GPL(drm_pagemap_devmem_init); @@ -880,3 +1261,19 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, return err; } EXPORT_SYMBOL(drm_pagemap_populate_mm); + +void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim) +{ + if (dpagemap->ops->destroy) + dpagemap->ops->destroy(dpagemap, is_atomic_or_reclaim); + else + kfree(dpagemap); +} + +static void drm_pagemap_exit(void) +{ + flush_work(&drm_pagemap_work); + if (WARN_ON(!llist_empty(&drm_pagemap_unhold_list))) + disable_work_sync(&drm_pagemap_work); +} +module_exit(drm_pagemap_exit); diff --git a/drivers/gpu/drm/drm_pagemap_util.c b/drivers/gpu/drm/drm_pagemap_util.c new file mode 100644 index 000000000000..c6ae3357c7fb --- /dev/null +++ b/drivers/gpu/drm/drm_pagemap_util.c @@ -0,0 +1,568 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/slab.h> + +#include <drm/drm_drv.h> +#include <drm/drm_managed.h> +#include <drm/drm_pagemap.h> +#include <drm/drm_pagemap_util.h> +#include <drm/drm_print.h> + +/** + * struct drm_pagemap_cache - Lookup structure for pagemaps + * + * Structure to keep track of active (refcount > 1) and inactive + * (refcount == 0) pagemaps. Inactive pagemaps can be made active + * again by waiting for the @queued completion (indicating that the + * pagemap has been put on the @shrinker's list of shrinkable + * pagemaps, and then successfully removing it from @shrinker's + * list. The latter may fail if the shrinker is already in the + * process of freeing the pagemap. A struct drm_pagemap_cache can + * hold a single struct drm_pagemap. + */ +struct drm_pagemap_cache { + /** @lookup_mutex: Mutex making the lookup process atomic */ + struct mutex lookup_mutex; + /** @lock: Lock protecting the @dpagemap pointer */ + spinlock_t lock; + /** @shrinker: Pointer to the shrinker used for this cache. Immutable. */ + struct drm_pagemap_shrinker *shrinker; + /** @dpagemap: Non-refcounted pointer to the drm_pagemap */ + struct drm_pagemap *dpagemap; + /** + * @queued: Signals when an inactive drm_pagemap has been put on + * @shrinker's list. + */ + struct completion queued; +}; + +/** + * struct drm_pagemap_shrinker - Shrinker to remove unused pagemaps + */ +struct drm_pagemap_shrinker { + /** @drm: Pointer to the drm device. */ + struct drm_device *drm; + /** @lock: Spinlock to protect the @dpagemaps list. */ + spinlock_t lock; + /** @dpagemaps: List of unused dpagemaps. */ + struct list_head dpagemaps; + /** @num_dpagemaps: Number of unused dpagemaps in @dpagemaps. */ + atomic_t num_dpagemaps; + /** @shrink: Pointer to the struct shrinker. */ + struct shrinker *shrink; +}; + +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap *dpagemap); + +static void drm_pagemap_cache_fini(void *arg) +{ + struct drm_pagemap_cache *cache = arg; + struct drm_pagemap *dpagemap; + + drm_dbg(cache->shrinker->drm, "Destroying dpagemap cache.\n"); + spin_lock(&cache->lock); + dpagemap = cache->dpagemap; + if (!dpagemap) { + spin_unlock(&cache->lock); + goto out; + } + + if (drm_pagemap_shrinker_cancel(dpagemap)) { + cache->dpagemap = NULL; + spin_unlock(&cache->lock); + drm_pagemap_destroy(dpagemap, false); + } + +out: + mutex_destroy(&cache->lookup_mutex); + kfree(cache); +} + +/** + * drm_pagemap_cache_create_devm() - Create a drm_pagemap_cache + * @shrinker: Pointer to a struct drm_pagemap_shrinker. + * + * Create a device-managed drm_pagemap cache. The cache is automatically + * destroyed on struct device removal, at which point any *inactive* + * drm_pagemap's are destroyed. + * + * Return: Pointer to a struct drm_pagemap_cache on success. Error pointer + * on failure. + */ +struct drm_pagemap_cache *drm_pagemap_cache_create_devm(struct drm_pagemap_shrinker *shrinker) +{ + struct drm_pagemap_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL); + int err; + + if (!cache) + return ERR_PTR(-ENOMEM); + + mutex_init(&cache->lookup_mutex); + spin_lock_init(&cache->lock); + cache->shrinker = shrinker; + init_completion(&cache->queued); + err = devm_add_action_or_reset(shrinker->drm->dev, drm_pagemap_cache_fini, cache); + if (err) + return ERR_PTR(err); + + return cache; +} +EXPORT_SYMBOL(drm_pagemap_cache_create_devm); + +/** + * DOC: Cache lookup + * + * Cache lookup should be done under a locked mutex, so that a + * failed drm_pagemap_get_from_cache() and a following + * drm_pagemap_cache_setpagemap() are carried out as an atomic + * operation WRT other lookups. Otherwise, racing lookups may + * unnecessarily concurrently create pagemaps to fulfill a + * failed lookup. The API provides two functions to perform this lock, + * drm_pagemap_lock_lookup() and drm_pagemap_unlock_lookup() and they + * should be used in the following way: + * + * .. code-block:: c + * + * drm_pagemap_lock_lookup(cache); + * dpagemap = drm_pagemap_get_from_cache(cache); + * if (dpagemap) + * goto out_unlock; + * + * dpagemap = driver_create_new_dpagemap(); + * if (!IS_ERR(dpagemap)) + * drm_pagemap_cache_set_pagemap(cache, dpagemap); + * + * out_unlock: + * drm_pagemap_unlock_lookup(cache); + */ + +/** + * drm_pagemap_cache_lock_lookup() - Lock a drm_pagemap_cache for lookup. + * @cache: The drm_pagemap_cache to lock. + * + * Return: %-EINTR if interrupted while blocking. %0 otherwise. + */ +int drm_pagemap_cache_lock_lookup(struct drm_pagemap_cache *cache) +{ + return mutex_lock_interruptible(&cache->lookup_mutex); +} +EXPORT_SYMBOL(drm_pagemap_cache_lock_lookup); + +/** + * drm_pagemap_cache_unlock_lookup() - Unlock a drm_pagemap_cache after lookup. + * @cache: The drm_pagemap_cache to unlock. + */ +void drm_pagemap_cache_unlock_lookup(struct drm_pagemap_cache *cache) +{ + mutex_unlock(&cache->lookup_mutex); +} +EXPORT_SYMBOL(drm_pagemap_cache_unlock_lookup); + +/** + * drm_pagemap_get_from_cache() - Lookup of drm_pagemaps. + * @cache: The cache used for lookup. + * + * If an active pagemap is present in the cache, it is immediately returned. + * If an inactive pagemap is present, it's removed from the shrinker list and + * an attempt is made to make it active. + * If no pagemap present or the attempt to make it active failed, %NULL is returned + * to indicate to the caller to create a new drm_pagemap and insert it into + * the cache. + * + * Return: A reference-counted pointer to a drm_pagemap if successful. An error + * pointer if an error occurred, or %NULL if no drm_pagemap was found and + * the caller should insert a new one. + */ +struct drm_pagemap *drm_pagemap_get_from_cache(struct drm_pagemap_cache *cache) +{ + struct drm_pagemap *dpagemap; + int err; + + lockdep_assert_held(&cache->lookup_mutex); +retry: + spin_lock(&cache->lock); + dpagemap = cache->dpagemap; + if (drm_pagemap_get_unless_zero(dpagemap)) { + spin_unlock(&cache->lock); + return dpagemap; + } + + if (!dpagemap) { + spin_unlock(&cache->lock); + return NULL; + } + + if (!try_wait_for_completion(&cache->queued)) { + spin_unlock(&cache->lock); + err = wait_for_completion_interruptible(&cache->queued); + if (err) + return ERR_PTR(err); + goto retry; + } + + if (drm_pagemap_shrinker_cancel(dpagemap)) { + cache->dpagemap = NULL; + spin_unlock(&cache->lock); + err = drm_pagemap_reinit(dpagemap); + if (err) { + drm_pagemap_destroy(dpagemap, false); + return ERR_PTR(err); + } + drm_pagemap_cache_set_pagemap(cache, dpagemap); + } else { + cache->dpagemap = NULL; + spin_unlock(&cache->lock); + dpagemap = NULL; + } + + return dpagemap; +} +EXPORT_SYMBOL(drm_pagemap_get_from_cache); + +/** + * drm_pagemap_cache_set_pagemap() - Assign a drm_pagemap to a drm_pagemap_cache + * @cache: The cache to assign the drm_pagemap to. + * @dpagemap: The drm_pagemap to assign. + * + * The function must be called to populate a drm_pagemap_cache only + * after a call to drm_pagemap_get_from_cache() returns NULL. + */ +void drm_pagemap_cache_set_pagemap(struct drm_pagemap_cache *cache, struct drm_pagemap *dpagemap) +{ + struct drm_device *drm = dpagemap->drm; + + lockdep_assert_held(&cache->lookup_mutex); + spin_lock(&cache->lock); + dpagemap->cache = cache; + swap(cache->dpagemap, dpagemap); + reinit_completion(&cache->queued); + spin_unlock(&cache->lock); + drm_WARN_ON(drm, !!dpagemap); +} +EXPORT_SYMBOL(drm_pagemap_cache_set_pagemap); + +/** + * drm_pagemap_get_from_cache_if_active() - Quick lookup of active drm_pagemaps + * @cache: The cache to lookup from. + * + * Function that should be used to lookup a drm_pagemap that is already active. + * (refcount > 0). + * + * Return: A pointer to the cache's drm_pagemap if it's active; %NULL otherwise. + */ +struct drm_pagemap *drm_pagemap_get_from_cache_if_active(struct drm_pagemap_cache *cache) +{ + struct drm_pagemap *dpagemap; + + spin_lock(&cache->lock); + dpagemap = drm_pagemap_get_unless_zero(cache->dpagemap); + spin_unlock(&cache->lock); + + return dpagemap; +} +EXPORT_SYMBOL(drm_pagemap_get_from_cache_if_active); + +static bool drm_pagemap_shrinker_cancel(struct drm_pagemap *dpagemap) +{ + struct drm_pagemap_cache *cache = dpagemap->cache; + struct drm_pagemap_shrinker *shrinker = cache->shrinker; + + spin_lock(&shrinker->lock); + if (list_empty(&dpagemap->shrink_link)) { + spin_unlock(&shrinker->lock); + return false; + } + + list_del_init(&dpagemap->shrink_link); + atomic_dec(&shrinker->num_dpagemaps); + spin_unlock(&shrinker->lock); + return true; +} + +#ifdef CONFIG_PROVE_LOCKING +/** + * drm_pagemap_shrinker_might_lock() - lockdep test for drm_pagemap_shrinker_add() + * @dpagemap: The drm pagemap. + * + * The drm_pagemap_shrinker_add() function performs some locking. + * This function can be called in code-paths that might + * call drm_pagemap_shrinker_add() to detect any lockdep problems early. + */ +void drm_pagemap_shrinker_might_lock(struct drm_pagemap *dpagemap) +{ + int idx; + + if (drm_dev_enter(dpagemap->drm, &idx)) { + struct drm_pagemap_cache *cache = dpagemap->cache; + + if (cache) + might_lock(&cache->shrinker->lock); + + drm_dev_exit(idx); + } +} +#endif + +/** + * drm_pagemap_shrinker_add() - Add a drm_pagemap to the shrinker list or destroy + * @dpagemap: The drm_pagemap. + * + * If @dpagemap is associated with a &struct drm_pagemap_cache AND the + * struct device backing the drm device is still alive, add @dpagemap to + * the &struct drm_pagemap_shrinker list of shrinkable drm_pagemaps. + * + * Otherwise destroy the pagemap directly using drm_pagemap_destroy(). + * + * This is an internal function which is not intended to be exposed to drivers. + */ +void drm_pagemap_shrinker_add(struct drm_pagemap *dpagemap) +{ + struct drm_pagemap_cache *cache; + struct drm_pagemap_shrinker *shrinker; + int idx; + + /* + * The pagemap cache and shrinker are disabled at + * pci device remove time. After that, dpagemaps + * are freed directly. + */ + if (!drm_dev_enter(dpagemap->drm, &idx)) + goto out_no_cache; + + cache = dpagemap->cache; + if (!cache) { + drm_dev_exit(idx); + goto out_no_cache; + } + + shrinker = cache->shrinker; + spin_lock(&shrinker->lock); + list_add_tail(&dpagemap->shrink_link, &shrinker->dpagemaps); + atomic_inc(&shrinker->num_dpagemaps); + spin_unlock(&shrinker->lock); + complete_all(&cache->queued); + drm_dev_exit(idx); + return; + +out_no_cache: + drm_pagemap_destroy(dpagemap, true); +} + +static unsigned long +drm_pagemap_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) +{ + struct drm_pagemap_shrinker *shrinker = shrink->private_data; + unsigned long count = atomic_read(&shrinker->num_dpagemaps); + + return count ? : SHRINK_EMPTY; +} + +static unsigned long +drm_pagemap_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + struct drm_pagemap_shrinker *shrinker = shrink->private_data; + struct drm_pagemap *dpagemap; + struct drm_pagemap_cache *cache; + unsigned long nr_freed = 0; + + sc->nr_scanned = 0; + spin_lock(&shrinker->lock); + do { + dpagemap = list_first_entry_or_null(&shrinker->dpagemaps, typeof(*dpagemap), + shrink_link); + if (!dpagemap) + break; + + atomic_dec(&shrinker->num_dpagemaps); + list_del_init(&dpagemap->shrink_link); + spin_unlock(&shrinker->lock); + + sc->nr_scanned++; + nr_freed++; + + cache = dpagemap->cache; + spin_lock(&cache->lock); + cache->dpagemap = NULL; + spin_unlock(&cache->lock); + + drm_dbg(dpagemap->drm, "Shrinking dpagemap %p.\n", dpagemap); + drm_pagemap_destroy(dpagemap, true); + spin_lock(&shrinker->lock); + } while (sc->nr_scanned < sc->nr_to_scan); + spin_unlock(&shrinker->lock); + + return sc->nr_scanned ? nr_freed : SHRINK_STOP; +} + +static void drm_pagemap_shrinker_fini(void *arg) +{ + struct drm_pagemap_shrinker *shrinker = arg; + + drm_dbg(shrinker->drm, "Destroying dpagemap shrinker.\n"); + drm_WARN_ON(shrinker->drm, !!atomic_read(&shrinker->num_dpagemaps)); + shrinker_free(shrinker->shrink); + kfree(shrinker); +} + +/** + * drm_pagemap_shrinker_create_devm() - Create and register a pagemap shrinker + * @drm: The drm device + * + * Create and register a pagemap shrinker that shrinks unused pagemaps + * and thereby reduces memory footprint. + * The shrinker is drm_device managed and unregisters itself when + * the drm device is removed. + * + * Return: %0 on success, negative error code on failure. + */ +struct drm_pagemap_shrinker *drm_pagemap_shrinker_create_devm(struct drm_device *drm) +{ + struct drm_pagemap_shrinker *shrinker; + struct shrinker *shrink; + int err; + + shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL); + if (!shrinker) + return ERR_PTR(-ENOMEM); + + shrink = shrinker_alloc(0, "drm-drm_pagemap:%s", drm->unique); + if (!shrink) { + kfree(shrinker); + return ERR_PTR(-ENOMEM); + } + + spin_lock_init(&shrinker->lock); + INIT_LIST_HEAD(&shrinker->dpagemaps); + shrinker->drm = drm; + shrinker->shrink = shrink; + shrink->count_objects = drm_pagemap_shrinker_count; + shrink->scan_objects = drm_pagemap_shrinker_scan; + shrink->private_data = shrinker; + shrinker_register(shrink); + + err = devm_add_action_or_reset(drm->dev, drm_pagemap_shrinker_fini, shrinker); + if (err) + return ERR_PTR(err); + + return shrinker; +} +EXPORT_SYMBOL(drm_pagemap_shrinker_create_devm); + +/** + * struct drm_pagemap_owner - Device interconnect group + * @kref: Reference count. + * + * A struct drm_pagemap_owner identifies a device interconnect group. + */ +struct drm_pagemap_owner { + struct kref kref; +}; + +static void drm_pagemap_owner_release(struct kref *kref) +{ + kfree(container_of(kref, struct drm_pagemap_owner, kref)); +} + +/** + * drm_pagemap_release_owner() - Stop participating in an interconnect group + * @peer: Pointer to the struct drm_pagemap_peer used when joining the group + * + * Stop participating in an interconnect group. This function is typically + * called when a pagemap is removed to indicate that it doesn't need to + * be taken into account. + */ +void drm_pagemap_release_owner(struct drm_pagemap_peer *peer) +{ + struct drm_pagemap_owner_list *owner_list = peer->list; + + if (!owner_list) + return; + + mutex_lock(&owner_list->lock); + list_del(&peer->link); + kref_put(&peer->owner->kref, drm_pagemap_owner_release); + peer->owner = NULL; + mutex_unlock(&owner_list->lock); +} +EXPORT_SYMBOL(drm_pagemap_release_owner); + +/** + * typedef interconnect_fn - Callback function to identify fast interconnects + * @peer1: First endpoint. + * @peer2: Second endpont. + * + * The function returns %true iff @peer1 and @peer2 have a fast interconnect. + * Note that this is symmetrical. The function has no notion of client and provider, + * which may not be sufficient in some cases. However, since the callback is intended + * to guide in providing common pagemap owners, the notion of a common owner to + * indicate fast interconnects would then have to change as well. + * + * Return: %true iff @peer1 and @peer2 have a fast interconnect. Otherwise @false. + */ +typedef bool (*interconnect_fn)(struct drm_pagemap_peer *peer1, struct drm_pagemap_peer *peer2); + +/** + * drm_pagemap_acquire_owner() - Join an interconnect group + * @peer: A struct drm_pagemap_peer keeping track of the device interconnect + * @owner_list: Pointer to the owner_list, keeping track of all interconnects + * @has_interconnect: Callback function to determine whether two peers have a + * fast local interconnect. + * + * Repeatedly calls @has_interconnect for @peer and other peers on @owner_list to + * determine a set of peers for which @peer has a fast interconnect. That set will + * have common &struct drm_pagemap_owner, and upon successful return, @peer::owner + * will point to that struct, holding a reference, and @peer will be registered in + * @owner_list. If @peer doesn't have any fast interconnects to other @peers, a + * new unique &struct drm_pagemap_owner will be allocated for it, and that + * may be shared with other peers that, at a later point, are determined to have + * a fast interconnect with @peer. + * + * When @peer no longer participates in an interconnect group, + * drm_pagemap_release_owner() should be called to drop the reference on the + * struct drm_pagemap_owner. + * + * Return: %0 on success, negative error code on failure. + */ +int drm_pagemap_acquire_owner(struct drm_pagemap_peer *peer, + struct drm_pagemap_owner_list *owner_list, + interconnect_fn has_interconnect) +{ + struct drm_pagemap_peer *cur_peer; + struct drm_pagemap_owner *owner = NULL; + bool interconnect = false; + + mutex_lock(&owner_list->lock); + might_alloc(GFP_KERNEL); + list_for_each_entry(cur_peer, &owner_list->peers, link) { + if (cur_peer->owner != owner) { + if (owner && interconnect) + break; + owner = cur_peer->owner; + interconnect = true; + } + if (interconnect && !has_interconnect(peer, cur_peer)) + interconnect = false; + } + + if (!interconnect) { + owner = kmalloc(sizeof(*owner), GFP_KERNEL); + if (!owner) { + mutex_unlock(&owner_list->lock); + return -ENOMEM; + } + kref_init(&owner->kref); + list_add_tail(&peer->link, &owner_list->peers); + } else { + kref_get(&owner->kref); + list_add_tail(&peer->link, &cur_peer->link); + } + peer->owner = owner; + peer->list = owner_list; + mutex_unlock(&owner_list->lock); + + return 0; +} +EXPORT_SYMBOL(drm_pagemap_acquire_owner); diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 7f08b4cd91d6..8dcc85cb8d42 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -98,6 +98,7 @@ xe-y += xe_bb.o \ xe_page_reclaim.o \ xe_pat.o \ xe_pci.o \ + xe_pci_rebar.o \ xe_pcode.o \ xe_pm.o \ xe_preempt_fence.o \ @@ -116,6 +117,7 @@ xe-y += xe_bb.o \ xe_sa.o \ xe_sched_job.o \ xe_shrinker.o \ + xe_soc_remapper.o \ xe_step.o \ xe_survivability_mode.o \ xe_sync.o \ diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 89a4f8c504e6..e33bd622ab44 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -8,6 +8,8 @@ #include <linux/types.h> +#include "abi/guc_scheduler_abi.h" + /** * DOC: GuC KLV * @@ -46,11 +48,18 @@ * Refers to 32 bit architecture version as reported by the HW IP. * This key is supported on MTL+ platforms only. * Requires GuC ABI 1.2+. + * + * _`GUC_KLV_GLOBAL_CFG_GROUP_SCHEDULING_AVAILABLE` : 0x3001 + * Tells the driver whether scheduler groups are enabled or not. + * Requires GuC ABI 1.26+ */ #define GUC_KLV_GLOBAL_CFG_GMD_ID_KEY 0x3000u #define GUC_KLV_GLOBAL_CFG_GMD_ID_LEN 1u +#define GUC_KLV_GLOBAL_CFG_GROUP_SCHEDULING_AVAILABLE_KEY 0x3001u +#define GUC_KLV_GLOBAL_CFG_GROUP_SCHEDULING_AVAILABLE_LEN 1u + /** * DOC: GuC Self Config KLVs * @@ -200,6 +209,20 @@ enum { * :0: adverse events are not counted (default) * :n: sample period in milliseconds * + * _`GUC_KLV_VGT_POLICY_ENGINE_GROUP_CONFIG` : 0x8004 + * This config allows the PF to split the engines across scheduling groups. + * Each group is independently timesliced across VFs, allowing different + * VFs to be active on the HW at the same time. When enabling this feature, + * all engines must be assigned to a group (and only one group), or they + * will be excluded from scheduling after this KLV is sent. To enable + * the groups, the driver must provide a masks array with + * GUC_MAX_ENGINE_CLASSES entries for each group, with each mask indicating + * which logical instances of that class belong to the group. Therefore, + * the length of this KLV when enabling groups is + * num_groups * GUC_MAX_ENGINE_CLASSES. To disable the groups, the driver + * must send the KLV without any payload (i.e. len = 0). The maximum + * number of groups is 8. + * * _`GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH` : 0x8D00 * This enum is to reset utilized HW engine after VF Switch (i.e to clean * up Stale HW register left behind by previous VF) @@ -214,6 +237,12 @@ enum { #define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY 0x8002 #define GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_LEN 1u +#define GUC_KLV_VGT_POLICY_ENGINE_GROUP_CONFIG_KEY 0x8004 +#define GUC_KLV_VGT_POLICY_ENGINE_GROUP_MAX_COUNT GUC_MAX_SCHED_GROUPS +#define GUC_KLV_VGT_POLICY_ENGINE_GROUP_CONFIG_MIN_LEN 0 +#define GUC_KLV_VGT_POLICY_ENGINE_GROUP_CONFIG_MAX_LEN \ + (GUC_KLV_VGT_POLICY_ENGINE_GROUP_MAX_COUNT * GUC_MAX_ENGINE_CLASSES) + #define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY 0x8D00 #define GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_LEN 1u @@ -268,6 +297,10 @@ enum { * it to take effect. Such cases might typically happen on a 1PF+1VF * Virtualization config enabled for heavier workloads like AI/ML. * + * If scheduling groups are supported, the provided value is applied to all + * groups (even if they've not yet been enabled). Support for this feature + * is available from GuC 70.53.0. + * * The max value for this KLV is 100 seconds, anything exceeding that * will be clamped to the max. * @@ -290,6 +323,10 @@ enum { * on a 1PF+1VF Virtualization config enabled for heavier workloads like * AI/ML. * + * If scheduling groups are supported, the provided value is applied to all + * groups (even if they've not yet been enabled). Support for this feature + * is available from GuC 70.53.0. + * * The max value for this KLV is 100 seconds, anything exceeding that * will be clamped to the max. * @@ -358,6 +395,26 @@ enum { * groups and cause the latter to be turned off when registered with the * GuC, this config allows the PF to set a threshold for multi-LRC context * registrations by VFs to monitor their behavior. + * + * _`GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM' : 0x8A0E + * This config sets the VFs-execution-quantum for each scheduling group in + * milliseconds. The driver must provide an array of values, with each of + * them matching the respective group index (first value goes to group 0, + * second to group 1, etc). The setting of group values follows the same + * behavior and rules as setting via GUC_KLV_VF_CFG_EXEC_QUANTUM. Note that + * the GuC always sets the EQ for all groups (even the non-enabled ones), + * so if we provide fewer values than the max the GuC will use 0 for the + * remaining groups. This KLV is available starting from GuC 70.53.0. + * + * _`GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT' : 0x8A0F + * This config sets the VFs-preemption-timeout for each scheduling group in + * microseconds. The driver must provide an array of values, with each of + * them matching the respective group index (first value goes to group 0, + * second to group 1, etc). The setting of group values follows the same + * behavior and rules as setting via GUC_KLV_VF_CFG_PREEMPT_TIMEOUT. Note + * that the GuC always sets the EQ for all groups (even the non-enabled + * ones), so if we provide fewer values than the max the GuC will use 0 for + * the remaining groups. This KLV is available starting from GuC 70.53.0. */ #define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001 @@ -419,6 +476,13 @@ enum { #define GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT_KEY 0x8a0d #define GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT_LEN 1u +#define GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_KEY 0x8a0e +#define GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_MIN_LEN 1u +#define GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_MAX_LEN GUC_MAX_SCHED_GROUPS + +#define GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_KEY 0x8a0f +#define GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_MIN_LEN 1u +#define GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_MAX_LEN GUC_MAX_SCHED_GROUPS /* * Workaround keys: */ diff --git a/drivers/gpu/drm/xe/abi/guc_scheduler_abi.h b/drivers/gpu/drm/xe/abi/guc_scheduler_abi.h new file mode 100644 index 000000000000..513b22a87428 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_scheduler_abi.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _ABI_GUC_SCHEDULER_ABI_H +#define _ABI_GUC_SCHEDULER_ABI_H + +#include <linux/types.h> + +/** + * Generic defines required for registration with and submissions to the GuC + * scheduler. Includes engine class/instance defines and context attributes + * (id, priority, etc) + */ + +/* Engine classes/instances */ +#define GUC_RENDER_CLASS 0 +#define GUC_VIDEO_CLASS 1 +#define GUC_VIDEOENHANCE_CLASS 2 +#define GUC_BLITTER_CLASS 3 +#define GUC_COMPUTE_CLASS 4 +#define GUC_GSC_OTHER_CLASS 5 +#define GUC_LAST_ENGINE_CLASS GUC_GSC_OTHER_CLASS +#define GUC_MAX_ENGINE_CLASSES 16 +#define GUC_MAX_INSTANCES_PER_CLASS 32 + +/* context priority values */ +#define GUC_CLIENT_PRIORITY_KMD_HIGH 0 +#define GUC_CLIENT_PRIORITY_HIGH 1 +#define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 +#define GUC_CLIENT_PRIORITY_NORMAL 3 +#define GUC_CLIENT_PRIORITY_NUM 4 + +/* Context registration */ +#define GUC_ID_MAX 65535 +#define GUC_ID_UNKNOWN 0xffffffff + +#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) +#define CONTEXT_REGISTRATION_FLAG_TYPE GENMASK(2, 1) +#define GUC_CONTEXT_NORMAL 0 +#define GUC_CONTEXT_COMPRESSION_SAVE 1 +#define GUC_CONTEXT_COMPRESSION_RESTORE 2 +#define GUC_CONTEXT_COUNT (GUC_CONTEXT_COMPRESSION_RESTORE + 1) + +/* context enable/disable */ +#define GUC_CONTEXT_DISABLE 0 +#define GUC_CONTEXT_ENABLE 1 + +/* scheduler groups */ +#define GUC_MAX_SCHED_GROUPS 8 + +struct guc_sched_group { + u32 engines[GUC_MAX_ENGINE_CLASSES]; +} __packed; + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h index 0f79c0714454..240d57993ea6 100644 --- a/drivers/gpu/drm/xe/regs/xe_pmt.h +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h @@ -18,9 +18,6 @@ #define BMG_TELEMETRY_BASE_OFFSET 0xE0000 #define BMG_TELEMETRY_OFFSET (SOC_BASE + BMG_TELEMETRY_BASE_OFFSET) -#define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08) -#define SG_REMAP_BITS REG_GENMASK(31, 24) - #define BMG_MODS_RESIDENCY_OFFSET (0x4D0) #define BMG_G2_RESIDENCY_OFFSET (0x530) #define BMG_G6_RESIDENCY_OFFSET (0x538) diff --git a/drivers/gpu/drm/xe/regs/xe_soc_remapper_regs.h b/drivers/gpu/drm/xe/regs/xe_soc_remapper_regs.h new file mode 100644 index 000000000000..be0eb37e73ad --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_soc_remapper_regs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ +#ifndef _XE_SOC_REMAPPER_REGS_H_ +#define _XE_SOC_REMAPPER_REGS_H_ + +#include "xe_regs.h" + +#define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08) +#define SG_REMAP_TELEM_MASK REG_GENMASK(31, 24) +#define SG_REMAP_SYSCTRL_MASK REG_GENMASK(23, 16) + +#endif diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 00afc84a8683..e101d290b2a6 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -16,6 +16,7 @@ #include <drm/drm_gem_ttm_helper.h> #include <drm/drm_ioctl.h> #include <drm/drm_managed.h> +#include <drm/drm_pagemap_util.h> #include <drm/drm_print.h> #include <uapi/drm/xe_drm.h> @@ -61,8 +62,10 @@ #include "xe_pxp.h" #include "xe_query.h" #include "xe_shrinker.h" +#include "xe_soc_remapper.h" #include "xe_survivability_mode.h" #include "xe_sriov.h" +#include "xe_svm.h" #include "xe_tile.h" #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_sys_mgr.h" @@ -376,6 +379,20 @@ static const struct file_operations xe_driver_fops = { .fop_flags = FOP_UNSIGNED_OFFSET, }; +/** + * xe_is_xe_file() - Is the file an xe device file? + * @file: The file. + * + * Checks whether the file is opened against + * an xe device. + * + * Return: %true if an xe file, %false if not. + */ +bool xe_is_xe_file(const struct file *file) +{ + return file->f_op == &xe_driver_fops; +} + static struct drm_driver driver = { /* Don't use MTRRs here; the Xserver or userspace app should * deal with them for Intel hardware. @@ -472,6 +489,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, init_rwsem(&xe->usm.lock); + err = xe_pagemap_shrinker_create(xe); + if (err) + goto err; + xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { @@ -969,6 +990,10 @@ int xe_device_probe(struct xe_device *xe) xe_nvm_init(xe); + err = xe_soc_remapper_init(xe); + if (err) + return err; + err = xe_heci_gsc_init(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 6604b89330d5..3e72fa4609f8 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -200,6 +200,8 @@ void xe_file_put(struct xe_file *xef); int xe_is_injection_active(void); +bool xe_is_xe_file(const struct file *file); + /* * Occasionally it is seen that the G2H worker starts running after a delay of more than * a second even after being queued and activated by the Linux workqueue subsystem. This diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index dad355fec50c..a85be9ba175e 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -36,6 +36,8 @@ #define TEST_VM_OPS_ERROR #endif +struct dram_info; +struct drm_pagemap_shrinker; struct intel_display; struct intel_dg_nvm_dev; struct xe_ggtt; @@ -332,6 +334,10 @@ struct xe_device { u8 has_pxp:1; /** @info.has_range_tlb_inval: Has range based TLB invalidations */ u8 has_range_tlb_inval:1; + /** @info.has_soc_remapper_sysctrl: Has SoC remapper system controller */ + u8 has_soc_remapper_sysctrl:1; + /** @info.has_soc_remapper_telem: Has SoC remapper telemetry support */ + u8 has_soc_remapper_telem:1; /** @info.has_sriov: Supports SR-IOV */ u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ @@ -449,6 +455,10 @@ struct xe_device { #define XE_PAGEFAULT_QUEUE_COUNT 4 /** @usm.pf_queue: Page fault queues */ struct xe_pagefault_queue pf_queue[XE_PAGEFAULT_QUEUE_COUNT]; +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + /** @usm.pagemap_shrinker: Shrinker for unused pagemaps */ + struct drm_pagemap_shrinker *dpagemap_shrinker; +#endif } usm; /** @pinned: pinned BO state */ @@ -572,6 +582,18 @@ struct xe_device { struct mutex lock; } pmt; + /** @soc_remapper: SoC remapper object */ + struct { + /** @soc_remapper.lock: Serialize access to SoC Remapper's index registers */ + spinlock_t lock; + + /** @soc_remapper.set_telem_region: Set telemetry index */ + void (*set_telem_region)(struct xe_device *xe, u32 index); + + /** @soc_remapper.set_sysctrl_region: Set system controller index */ + void (*set_sysctrl_region)(struct xe_device *xe, u32 index); + } soc_remapper; + /** * @pm_callback_task: Track the active task that is running in either * the runtime_suspend or runtime_resume callbacks. diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 41023a464480..0b9e074b022f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -17,6 +17,7 @@ #include "xe_dep_scheduler.h" #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_vf.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_hw_engine_group.h" @@ -1108,6 +1109,17 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, return return_mask; } +static bool has_sched_groups(struct xe_gt *gt) +{ + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_sriov_pf_sched_groups_enabled(gt)) + return true; + + if (IS_SRIOV_VF(gt_to_xe(gt)) && xe_gt_sriov_vf_sched_groups_enabled(gt)) + return true; + + return false; +} + int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -1200,6 +1212,13 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return -ENOENT; } + /* SRIOV sched groups are not compatible with multi-lrc */ + if (XE_IOCTL_DBG(xe, args->width > 1 && has_sched_groups(hwe->gt))) { + up_read(&vm->lock); + xe_vm_put(vm); + return -EINVAL; + } + q = xe_exec_queue_create(xe, vm, logical_mask, args->width, hwe, flags, args->extensions); diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index 247e41c1c48d..e7a50b1348b7 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -195,7 +195,8 @@ int main(int argc, const char *argv[]) } } - fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn, prefix, prefix); + fprintf(args[ARGS_CHEADER].f, HEADER, xbasename(args[ARGS_INPUT].fn), + prefix, prefix); ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f, args[ARGS_CHEADER].f, prefix); diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 94969ddd9d88..de7e47763411 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -20,7 +20,17 @@ for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \ xe_hw_engine_is_valid((hwe__))) -#define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0) +#define XE_ENGINE_INSTANCES_FROM_MASK(gt, NAME) \ + (((gt)->info.engine_mask & XE_HW_ENGINE_##NAME##_MASK) >> XE_HW_ENGINE_##NAME##0) + +#define RCS_INSTANCES(gt) XE_ENGINE_INSTANCES_FROM_MASK(gt, RCS) +#define VCS_INSTANCES(gt) XE_ENGINE_INSTANCES_FROM_MASK(gt, VCS) +#define VECS_INSTANCES(gt) XE_ENGINE_INSTANCES_FROM_MASK(gt, VECS) +#define CCS_INSTANCES(gt) XE_ENGINE_INSTANCES_FROM_MASK(gt, CCS) +#define GSCCS_INSTANCES(gt) XE_ENGINE_INSTANCES_FROM_MASK(gt, GSCCS) + +/* Our devices have up to 4 media slices */ +#define MAX_MEDIA_SLICES 4 #define GT_VER(gt) ({ \ typeof(gt) gt_ = (gt); \ diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index 50fffc9ebf62..91ac22ef5703 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -17,7 +17,7 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) { u32 mode = CCS_MODE_CSLICE_0_3_MASK; /* disable all by default */ - int num_slices = hweight32(CCS_MASK(gt)); + int num_slices = hweight32(CCS_INSTANCES(gt)); struct xe_device *xe = gt_to_xe(gt); int width, cslice = 0; u32 config = 0; @@ -59,7 +59,7 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) config |= BIT(hwe->instance) << XE_HW_ENGINE_CCS0; /* If a slice is fused off, leave disabled */ - while ((CCS_MASK(gt) & BIT(cslice)) == 0) + while ((CCS_INSTANCES(gt) & BIT(cslice)) == 0) cslice++; mode &= ~CCS_MODE_CSLICE(cslice, CCS_MODE_CSLICE_MASK); @@ -94,7 +94,7 @@ num_cslices_show(struct device *kdev, { struct xe_gt *gt = kobj_to_gt(&kdev->kobj); - return sysfs_emit(buf, "%u\n", hweight32(CCS_MASK(gt))); + return sysfs_emit(buf, "%u\n", hweight32(CCS_INSTANCES(gt))); } static DEVICE_ATTR_RO(num_cslices); @@ -131,7 +131,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, * Ensure number of engines specified is valid and there is an * exact multiple of engines for slices. */ - num_slices = hweight32(CCS_MASK(gt)); + num_slices = hweight32(CCS_INSTANCES(gt)); if (!num_engines || num_engines > num_slices || num_slices % num_engines) { xe_gt_dbg(gt, "Invalid compute config, %d engines %d slices\n", num_engines, num_slices); diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h index f8779852cf0d..ef3b853f5c8c 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h @@ -17,7 +17,7 @@ int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt); static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt) { /* Check if there are more than one compute engines available */ - return hweight32(CCS_MASK(gt)) > 1; + return hweight32(CCS_INSTANCES(gt)) > 1; } #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index 0714c758b9c1..fb5c9101e275 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -14,6 +14,7 @@ #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_migration.h" +#include "xe_gt_sriov_pf_policy.h" #include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" #include "xe_guc_submit.h" @@ -123,6 +124,8 @@ int xe_gt_sriov_pf_init(struct xe_gt *gt) if (err) return err; + xe_gt_sriov_pf_policy_init(gt); + err = xe_gt_sriov_pf_migration_init(gt); if (err) return err; @@ -281,3 +284,20 @@ int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt) pf_flush_restart(gt); return 0; } + +/** + * xe_gt_sriov_pf_sched_groups_enabled - Check if multiple scheduler groups are + * enabled + * @gt: the &xe_gt + * + * This function is for PF use only. + * + * Return: true if shed groups were enabled, false otherwise. + */ +bool xe_gt_sriov_pf_sched_groups_enabled(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + return xe_gt_sriov_pf_policy_sched_groups_enabled(gt); +} + diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index e7fde3f9937a..1ccfc7137b98 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -6,6 +6,8 @@ #ifndef _XE_GT_SRIOV_PF_H_ #define _XE_GT_SRIOV_PF_H_ +#include <linux/types.h> + struct xe_gt; #ifdef CONFIG_PCI_IOV @@ -16,6 +18,7 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt); void xe_gt_sriov_pf_restart(struct xe_gt *gt); +bool xe_gt_sriov_pf_sched_groups_enabled(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) { @@ -38,6 +41,11 @@ static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt) { } + +static inline bool xe_gt_sriov_pf_sched_groups_enabled(struct xe_gt *gt) +{ + return false; +} #endif #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 6e8507c24986..5a870914b102 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -195,6 +195,25 @@ static int pf_push_vf_cfg_dbs(struct xe_gt *gt, unsigned int vfid, u32 begin, u3 return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs)); } +static int pf_push_vf_grp_cfg_u32(struct xe_gt *gt, unsigned int vfid, + u16 key, const u32 *values, u32 count) +{ + CLASS(xe_guc_buf, buf)(>->uc.guc.buf, GUC_KLV_LEN_MIN + GUC_MAX_SCHED_GROUPS); + u32 *klv; + + xe_gt_assert(gt, count && count <= GUC_MAX_SCHED_GROUPS); + + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + klv = xe_guc_buf_cpu_ptr(buf); + + klv[0] = FIELD_PREP(GUC_KLV_0_KEY, key) | FIELD_PREP(GUC_KLV_0_LEN, count); + memcpy(&klv[1], values, count * sizeof(u32)); + + return pf_push_vf_buf_klvs(gt, vfid, 1, buf, GUC_KLV_LEN_MIN + count); +} + static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 *exec_quantum) { /* GuC will silently clamp values exceeding max */ @@ -268,6 +287,37 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, return encode_ggtt(cfg, node->base.start, node->base.size, details); } +static u32 encode_config_sched(struct xe_gt *gt, u32 *cfg, u32 n, + const struct xe_gt_sriov_config *config) +{ + int i; + + if (xe_sriov_gt_pf_policy_has_multi_group_modes(gt)) { + BUILD_BUG_ON(ARRAY_SIZE(config->exec_quantum) > + GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_MAX_LEN); + BUILD_BUG_ON(ARRAY_SIZE(config->preempt_timeout) > + GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_MAX_LEN); + + cfg[n++] = PREP_GUC_KLV_CONST(GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_KEY, + ARRAY_SIZE(config->exec_quantum)); + for (i = 0; i < ARRAY_SIZE(config->exec_quantum); i++) + cfg[n++] = config->exec_quantum[i]; + + cfg[n++] = PREP_GUC_KLV_CONST(GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_KEY, + ARRAY_SIZE(config->preempt_timeout)); + for (i = 0; i < ARRAY_SIZE(config->preempt_timeout); i++) + cfg[n++] = config->preempt_timeout[i]; + } else { + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM); + cfg[n++] = config->exec_quantum[0]; + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT); + cfg[n++] = config->preempt_timeout[0]; + } + + return n; +} + /* Return: number of configuration dwords written */ static u32 encode_config(struct xe_gt *gt, u32 *cfg, const struct xe_gt_sriov_config *config, bool details) @@ -298,11 +348,7 @@ static u32 encode_config(struct xe_gt *gt, u32 *cfg, const struct xe_gt_sriov_co cfg[n++] = upper_32_bits(xe_bo_size(config->lmem_obj)); } - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM); - cfg[n++] = config->exec_quantum; - - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT); - cfg[n++] = config->preempt_timeout; + n = encode_config_sched(gt, cfg, n, config); #define encode_threshold_config(TAG, NAME, VER...) ({ \ if (IF_ARGS(GUC_FIRMWARE_VER_AT_LEAST(>->uc.guc, VER), true, VER)) { \ @@ -976,6 +1022,33 @@ static int pf_config_set_u32_done(struct xe_gt *gt, unsigned int vfid, u32 value return 0; } +static char *to_group_name(const char *what, u8 group, char *buf, size_t size) +{ + snprintf(buf, size, "group%u%s%s", group, what ? " " : "", what ?: ""); + return buf; +} + +static int +pf_groups_cfg_set_u32_done(struct xe_gt *gt, unsigned int vfid, u32 *values, u32 count, + void (*get_actual)(struct xe_gt *, unsigned int, u32 *, u32), + const char *what, const char *(*unit)(u32), int err) +{ + u32 actual[GUC_MAX_SCHED_GROUPS]; + char group_name[32]; + u8 g; + + xe_gt_assert(gt, count <= ARRAY_SIZE(actual)); + + get_actual(gt, vfid, actual, count); + + for (g = 0; g < count; g++) + pf_config_set_u32_done(gt, vfid, values[g], actual[g], + to_group_name(what, g, group_name, sizeof(group_name)), + unit, err); + + return err; +} + /** * xe_gt_sriov_pf_config_set_ctxs - Configure GuC contexts IDs quota for the VF. * @gt: the &xe_gt @@ -1860,12 +1933,15 @@ static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid, { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); int err; + int i; err = pf_push_vf_cfg_exec_quantum(gt, vfid, &exec_quantum); if (unlikely(err)) return err; - config->exec_quantum = exec_quantum; + for (i = 0; i < ARRAY_SIZE(config->exec_quantum); i++) + config->exec_quantum[i] = exec_quantum; + return 0; } @@ -1873,7 +1949,7 @@ static u32 pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - return config->exec_quantum; + return config->exec_quantum[0]; } /** @@ -1980,6 +2056,88 @@ int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exe exec_quantum_unit, n, err); } +static int pf_provision_groups_exec_quantums(struct xe_gt *gt, unsigned int vfid, + const u32 *exec_quantums, u32 count) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int err; + int i; + + err = pf_push_vf_grp_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_KEY, + exec_quantums, count); + if (unlikely(err)) + return err; + + /* + * GuC silently clamps values exceeding the max and zeroes out the + * quantum for groups not in the klv payload + */ + for (i = 0; i < ARRAY_SIZE(config->exec_quantum); i++) { + if (i < count) + config->exec_quantum[i] = min_t(u32, exec_quantums[i], + GUC_KLV_VF_CFG_EXEC_QUANTUM_MAX_VALUE); + else + config->exec_quantum[i] = 0; + } + + return 0; +} + +static void pf_get_groups_exec_quantums(struct xe_gt *gt, unsigned int vfid, + u32 *exec_quantums, u32 max_count) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + u32 count = min_t(u32, max_count, ARRAY_SIZE(config->exec_quantum)); + + memcpy(exec_quantums, config->exec_quantum, sizeof(u32) * count); +} + +/** + * xe_gt_sriov_pf_config_set_groups_exec_quantums() - Configure PF/VF EQs for sched groups. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier + * @exec_quantums: array of requested EQs in milliseconds (0 is infinity) + * @count: number of entries in the array + * + * This function can only be called on PF. + * It will log the provisioned value or an error in case of the failure. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_groups_exec_quantums(struct xe_gt *gt, unsigned int vfid, + u32 *exec_quantums, u32 count) +{ + int err; + + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + err = pf_provision_groups_exec_quantums(gt, vfid, exec_quantums, count); + + return pf_groups_cfg_set_u32_done(gt, vfid, exec_quantums, count, + pf_get_groups_exec_quantums, + "execution quantum", + exec_quantum_unit, err); +} + +/** + * xe_gt_sriov_pf_config_get_groups_exec_quantums() - Get PF/VF sched groups EQs + * @gt: the &xe_gt + * @vfid: the PF or VF identifier + * @exec_quantums: array in which to store the execution quantums values + * @count: maximum number of entries to store + * + * This function can only be called on PF. + */ +void xe_gt_sriov_pf_config_get_groups_exec_quantums(struct xe_gt *gt, unsigned int vfid, + u32 *exec_quantums, u32 count) +{ + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + xe_gt_assert(gt, count <= GUC_MAX_SCHED_GROUPS); + + pf_get_groups_exec_quantums(gt, vfid, exec_quantums, count); +} + static const char *preempt_timeout_unit(u32 preempt_timeout) { return preempt_timeout ? "us" : "(infinity)"; @@ -1990,12 +2148,14 @@ static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid, { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); int err; + int i; err = pf_push_vf_cfg_preempt_timeout(gt, vfid, &preempt_timeout); if (unlikely(err)) return err; - config->preempt_timeout = preempt_timeout; + for (i = 0; i < ARRAY_SIZE(config->preempt_timeout); i++) + config->preempt_timeout[i] = preempt_timeout; return 0; } @@ -2004,7 +2164,7 @@ static u32 pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - return config->preempt_timeout; + return config->preempt_timeout[0]; } /** @@ -2110,6 +2270,89 @@ int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32 preempt_timeout_unit, n, err); } +static int pf_provision_groups_preempt_timeouts(struct xe_gt *gt, unsigned int vfid, + const u32 *preempt_timeouts, u32 count) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int err; + int i; + + err = pf_push_vf_grp_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_KEY, + preempt_timeouts, count); + if (unlikely(err)) + return err; + + /* + * GuC silently clamps values exceeding the max and zeroes out the + * quantum for groups not in the klv payload + */ + for (i = 0; i < ARRAY_SIZE(config->preempt_timeout); i++) { + if (i < count) + config->preempt_timeout[i] = + min_t(u32, preempt_timeouts[i], + GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_MAX_VALUE); + else + config->preempt_timeout[i] = 0; + } + + return 0; +} + +static void pf_get_groups_preempt_timeouts(struct xe_gt *gt, unsigned int vfid, + u32 *preempt_timeouts, u32 max_count) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + u32 count = min_t(u32, max_count, ARRAY_SIZE(config->preempt_timeout)); + + memcpy(preempt_timeouts, config->preempt_timeout, sizeof(u32) * count); +} + +/** + * xe_gt_sriov_pf_config_set_groups_preempt_timeouts() - Configure PF/VF PTs for sched groups. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier + * @preempt_timeouts: array of requested PTs in microseconds (0 is infinity) + * @count: number of entries in the array + * + * This function can only be called on PF. + * It will log the provisioned value or an error in case of the failure. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_groups_preempt_timeouts(struct xe_gt *gt, unsigned int vfid, + u32 *preempt_timeouts, u32 count) +{ + int err; + + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + err = pf_provision_groups_preempt_timeouts(gt, vfid, preempt_timeouts, count); + + return pf_groups_cfg_set_u32_done(gt, vfid, preempt_timeouts, count, + pf_get_groups_preempt_timeouts, + "preempt_timeout", + preempt_timeout_unit, err); +} + +/** + * xe_gt_sriov_pf_config_get_groups_preempt_timeouts() - Get PF/VF sched groups PTs + * @gt: the &xe_gt + * @vfid: the PF or VF identifier + * @preempt_timeouts: array in which to store the preemption timeouts values + * @count: maximum number of entries to store + * + * This function can only be called on PF. + */ +void xe_gt_sriov_pf_config_get_groups_preempt_timeouts(struct xe_gt *gt, unsigned int vfid, + u32 *preempt_timeouts, u32 count) +{ + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + xe_gt_assert(gt, count <= GUC_MAX_SCHED_GROUPS); + + pf_get_groups_preempt_timeouts(gt, vfid, preempt_timeouts, count); +} + static const char *sched_priority_unit(u32 priority) { return priority == GUC_SCHED_PRIORITY_LOW ? "(low)" : @@ -2183,10 +2426,14 @@ u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *config) { + int i; + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); - config->exec_quantum = 0; - config->preempt_timeout = 0; + for (i = 0; i < ARRAY_SIZE(config->exec_quantum); i++) { + config->exec_quantum[i] = 0; + config->preempt_timeout[i] = 0; + } } static int pf_provision_threshold(struct xe_gt *gt, unsigned int vfid, @@ -2548,6 +2795,16 @@ static int pf_restore_vf_config_klv(struct xe_gt *gt, unsigned int vfid, return -EBADMSG; return pf_provision_exec_quantum(gt, vfid, value[0]); + case GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_KEY: + if (len > GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_MAX_LEN) + return -EBADMSG; + return pf_provision_groups_exec_quantums(gt, vfid, value, len); + + case GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_KEY: + if (len > GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_MAX_LEN) + return -EBADMSG; + return pf_provision_groups_preempt_timeouts(gt, vfid, value, len); + case GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY: if (len != GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN) return -EBADMSG; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index 4975730423d7..3c6c8b6655af 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -46,6 +46,11 @@ int xe_gt_sriov_pf_config_set_exec_quantum_locked(struct xe_gt *gt, unsigned int u32 exec_quantum); int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exec_quantum); +void xe_gt_sriov_pf_config_get_groups_exec_quantums(struct xe_gt *gt, unsigned int vfid, + u32 *exec_quantum, u32 max_count); +int xe_gt_sriov_pf_config_set_groups_exec_quantums(struct xe_gt *gt, unsigned int vfid, + u32 *exec_quantum, u32 count); + u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout); @@ -55,6 +60,11 @@ int xe_gt_sriov_pf_config_set_preempt_timeout_locked(struct xe_gt *gt, unsigned u32 preempt_timeout); int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32 preempt_timeout); +void xe_gt_sriov_pf_config_get_groups_preempt_timeouts(struct xe_gt *gt, unsigned int vfid, + u32 *preempt_timeout, u32 max_count); +int xe_gt_sriov_pf_config_set_groups_preempt_timeouts(struct xe_gt *gt, unsigned int vfid, + u32 *preempt_timeout, u32 count); + u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h index 686c7b3b6d7a..75a48d0fa859 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -6,6 +6,7 @@ #ifndef _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ #define _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ +#include "abi/guc_scheduler_abi.h" #include "xe_ggtt_types.h" #include "xe_guc_klv_thresholds_set_types.h" @@ -30,9 +31,9 @@ struct xe_gt_sriov_config { /** @begin_db: start index of GuC doorbell ID range. */ u16 begin_db; /** @exec_quantum: execution-quantum in milliseconds. */ - u32 exec_quantum; + u32 exec_quantum[GUC_MAX_SCHED_GROUPS]; /** @preempt_timeout: preemption timeout in microseconds. */ - u32 preempt_timeout; + u32 preempt_timeout[GUC_MAX_SCHED_GROUPS]; /** @sched_priority: scheduling priority. */ u32 sched_priority; /** @thresholds: GuC thresholds for adverse events notifications. */ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index ece9eed5d7c5..47d288c53539 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -160,6 +160,299 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent) * /sys/kernel/debug/dri/BDF/ * ├── sriov * : ├── pf + * : ├── tile0 + * : ├── gt0 + * : ├── sched_groups_mode + * ├── sched_groups_exec_quantums_ms + * ├── sched_groups_preempt_timeout_us + * ├── sched_groups + * : ├── group0 + * : + * : └── groupN + * ├── vf1 + * : ├── tile0 + * : ├── gt0 + * : ├── sched_groups_exec_quantums_ms + * ├── sched_groups_preempt_timeout_us + * : + */ + +static const char *sched_group_mode_to_string(enum xe_sriov_sched_group_modes mode) +{ + switch (mode) { + case XE_SRIOV_SCHED_GROUPS_DISABLED: + return "disabled"; + case XE_SRIOV_SCHED_GROUPS_MEDIA_SLICES: + return "media_slices"; + case XE_SRIOV_SCHED_GROUPS_MODES_COUNT: + /* dummy mode to make the compiler happy */ + break; + } + + return "unknown"; +} + +static int sched_groups_info(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct xe_gt *gt = extract_gt(m->private); + enum xe_sriov_sched_group_modes current_mode = + gt->sriov.pf.policy.guc.sched_groups.current_mode; + enum xe_sriov_sched_group_modes mode; + + for (mode = XE_SRIOV_SCHED_GROUPS_DISABLED; + mode < XE_SRIOV_SCHED_GROUPS_MODES_COUNT; + mode++) { + if (!xe_sriov_gt_pf_policy_has_sched_group_mode(gt, mode)) + continue; + + drm_printf(&p, "%s%s%s%s", + mode == XE_SRIOV_SCHED_GROUPS_DISABLED ? "" : " ", + mode == current_mode ? "[" : "", + sched_group_mode_to_string(mode), + mode == current_mode ? "]" : ""); + } + + drm_puts(&p, "\n"); + + return 0; +} + +static int sched_groups_open(struct inode *inode, struct file *file) +{ + return single_open(file, sched_groups_info, inode->i_private); +} + +static ssize_t sched_groups_write(struct file *file, const char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_gt *gt = extract_gt(file_inode(file)->i_private); + enum xe_sriov_sched_group_modes mode; + char name[32]; + int ret; + + if (*pos) + return -ESPIPE; + + if (!size) + return -ENODATA; + + if (size > sizeof(name) - 1) + return -EINVAL; + + ret = simple_write_to_buffer(name, sizeof(name) - 1, pos, ubuf, size); + if (ret < 0) + return ret; + name[ret] = '\0'; + + for (mode = XE_SRIOV_SCHED_GROUPS_DISABLED; + mode < XE_SRIOV_SCHED_GROUPS_MODES_COUNT; + mode++) + if (sysfs_streq(name, sched_group_mode_to_string(mode))) + break; + + if (mode == XE_SRIOV_SCHED_GROUPS_MODES_COUNT) + return -EINVAL; + + guard(xe_pm_runtime)(gt_to_xe(gt)); + ret = xe_gt_sriov_pf_policy_set_sched_groups_mode(gt, mode); + + return ret < 0 ? ret : size; +} + +static const struct file_operations sched_groups_fops = { + .owner = THIS_MODULE, + .open = sched_groups_open, + .read = seq_read, + .write = sched_groups_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int sched_groups_config_show(struct seq_file *m, void *data, + void (*get)(struct xe_gt *, unsigned int, u32 *, u32)) +{ + struct drm_printer p = drm_seq_file_printer(m); + unsigned int vfid = extract_vfid(m->private); + struct xe_gt *gt = extract_gt(m->private); + u32 values[GUC_MAX_SCHED_GROUPS]; + bool first = true; + u8 group; + + get(gt, vfid, values, ARRAY_SIZE(values)); + + for (group = 0; group < ARRAY_SIZE(values); group++) { + drm_printf(&p, "%s%u", first ? "" : ",", values[group]); + + first = false; + } + + drm_puts(&p, "\n"); + + return 0; +} + +static ssize_t sched_groups_config_write(struct file *file, const char __user *ubuf, + size_t size, loff_t *pos, + int (*set)(struct xe_gt *, unsigned int, u32 *, u32)) +{ + struct dentry *parent = file_inode(file)->i_private; + unsigned int vfid = extract_vfid(parent); + struct xe_gt *gt = extract_gt(parent); + u32 values[GUC_MAX_SCHED_GROUPS]; + int *input __free(kfree) = NULL; + u32 count; + int ret; + int i; + + if (*pos) + return -ESPIPE; + + if (!size) + return -ENODATA; + + ret = parse_int_array_user(ubuf, min(size, GUC_MAX_SCHED_GROUPS * sizeof(u32)), &input); + if (ret) + return ret; + + count = input[0]; + if (count > GUC_MAX_SCHED_GROUPS) + return -E2BIG; + + for (i = 0; i < count; i++) { + if (input[i + 1] < 0 || input[i + 1] > S32_MAX) + return -EINVAL; + + values[i] = input[i + 1]; + } + + guard(xe_pm_runtime)(gt_to_xe(gt)); + ret = set(gt, vfid, values, count); + + return ret < 0 ? ret : size; +} + +#define DEFINE_SRIOV_GT_GRP_CFG_DEBUGFS_ATTRIBUTE(CONFIG) \ +static int sched_groups_##CONFIG##_show(struct seq_file *m, void *data) \ +{ \ + return sched_groups_config_show(m, data, \ + xe_gt_sriov_pf_config_get_groups_##CONFIG); \ +} \ + \ +static int sched_groups_##CONFIG##_open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, sched_groups_##CONFIG##_show, \ + inode->i_private); \ +} \ + \ +static ssize_t sched_groups_##CONFIG##_write(struct file *file, \ + const char __user *ubuf, \ + size_t size, loff_t *pos) \ +{ \ + return sched_groups_config_write(file, ubuf, size, pos, \ + xe_gt_sriov_pf_config_set_groups_##CONFIG); \ +} \ + \ +static const struct file_operations sched_groups_##CONFIG##_fops = { \ + .owner = THIS_MODULE, \ + .open = sched_groups_##CONFIG##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .write = sched_groups_##CONFIG##_write, \ + .release = single_release, \ +} + +DEFINE_SRIOV_GT_GRP_CFG_DEBUGFS_ATTRIBUTE(exec_quantums); +DEFINE_SRIOV_GT_GRP_CFG_DEBUGFS_ATTRIBUTE(preempt_timeouts); + +static ssize_t sched_group_engines_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct dentry *dent = file_dentry(file); + struct xe_gt *gt = extract_gt(dent->d_parent->d_parent); + struct xe_gt_sriov_scheduler_groups *info = >->sriov.pf.policy.guc.sched_groups; + struct guc_sched_group *groups = info->modes[info->current_mode].groups; + u32 num_groups = info->modes[info->current_mode].num_groups; + unsigned int group = (uintptr_t)extract_priv(dent); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + char engines[128]; + + engines[0] = '\0'; + + if (group < num_groups) { + for_each_hw_engine(hwe, gt, id) { + u8 guc_class = xe_engine_class_to_guc_class(hwe->class); + u32 mask = groups[group].engines[guc_class]; + + if (mask & BIT(hwe->logical_instance)) { + strlcat(engines, hwe->name, sizeof(engines)); + strlcat(engines, " ", sizeof(engines)); + } + } + strlcat(engines, "\n", sizeof(engines)); + } + + return simple_read_from_buffer(buf, count, ppos, engines, strlen(engines)); +} + +static const struct file_operations sched_group_engines_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = sched_group_engines_read, + .llseek = default_llseek, +}; + +static void pf_add_sched_groups(struct xe_gt *gt, struct dentry *parent, unsigned int vfid) +{ + struct dentry *groups; + u8 group; + + xe_gt_assert(gt, gt == extract_gt(parent)); + xe_gt_assert(gt, vfid == extract_vfid(parent)); + + /* + * TODO: we currently call this function before we initialize scheduler + * groups, so at this point in time we don't know if there are any + * valid groups on the GT and we can't selectively register the debugfs + * only if there are any. Therefore, we always register the debugfs + * files if we're on a platform that has support for groups. + * We should rework the flow so that debugfs is registered after the + * policy init, so that we check if there are valid groups before + * adding the debugfs files. + * Similarly, instead of using GUC_MAX_SCHED_GROUPS we could use + * gt->sriov.pf.policy.guc.sched_groups.max_number_of_groups. + */ + if (!xe_sriov_gt_pf_policy_has_sched_groups_support(gt)) + return; + + debugfs_create_file("sched_groups_exec_quantums_ms", 0644, parent, parent, + &sched_groups_exec_quantums_fops); + debugfs_create_file("sched_groups_preempt_timeouts_us", 0644, parent, parent, + &sched_groups_preempt_timeouts_fops); + + if (vfid != PFID) + return; + + debugfs_create_file("sched_groups_mode", 0644, parent, parent, &sched_groups_fops); + + groups = debugfs_create_dir("sched_groups", parent); + if (IS_ERR(groups)) + return; + + for (group = 0; group < GUC_MAX_SCHED_GROUPS; group++) { + char name[10]; + + snprintf(name, sizeof(name), "group%u", group); + debugfs_create_file(name, 0644, groups, (void *)(uintptr_t)group, + &sched_group_engines_fops); + } +} + +/* + * /sys/kernel/debug/dri/BDF/ + * ├── sriov + * : ├── pf * │ ├── tile0 * │ : ├── gt0 * │ : ├── doorbells_spare @@ -518,6 +811,7 @@ static void pf_populate_gt(struct xe_gt *gt, struct dentry *dent, unsigned int v if (vfid) { pf_add_config_attrs(gt, dent, vfid); + pf_add_sched_groups(gt, dent, vfid); debugfs_create_file("control", 0600, dent, NULL, &control_ops); @@ -531,6 +825,7 @@ static void pf_populate_gt(struct xe_gt *gt, struct dentry *dent, unsigned int v } else { pf_add_config_attrs(gt, dent, PFID); pf_add_policy_attrs(gt, dent); + pf_add_sched_groups(gt, dent, PFID); drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), dent, minor); } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c index 4445f660e6d1..c28606ca6623 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c @@ -3,6 +3,8 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <drm/drm_managed.h> + #include "abi/guc_actions_sriov_abi.h" #include "xe_bo.h" @@ -10,9 +12,11 @@ #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_policy.h" #include "xe_gt_sriov_printk.h" +#include "xe_guc.h" #include "xe_guc_buf.h" #include "xe_guc_ct.h" #include "xe_guc_klv_helpers.h" +#include "xe_guc_submit.h" #include "xe_pm.h" /* @@ -94,6 +98,23 @@ static int pf_push_policy_u32(struct xe_gt *gt, u16 key, u32 value) return pf_push_policy_klvs(gt, 1, klv, ARRAY_SIZE(klv)); } +static int pf_push_policy_payload(struct xe_gt *gt, u16 key, void *payload, u32 num_dwords) +{ + CLASS(xe_guc_buf, buf)(>->uc.guc.buf, GUC_KLV_LEN_MIN + num_dwords); + u32 *klv; + + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + klv = xe_guc_buf_cpu_ptr(buf); + + klv[0] = PREP_GUC_KLV(key, num_dwords); + if (num_dwords) + memcpy(&klv[1], payload, num_dwords * sizeof(u32)); + + return pf_push_policy_buf_klvs(gt, 1, buf, GUC_KLV_LEN_MIN + num_dwords); +} + static int pf_update_policy_bool(struct xe_gt *gt, u16 key, bool *policy, bool value) { int err; @@ -351,11 +372,306 @@ u32 xe_gt_sriov_pf_policy_get_sample_period(struct xe_gt *gt) return value; } +static void pf_sched_group_media_slices(struct xe_gt *gt, struct guc_sched_group **groups, + u32 *num_groups) +{ + u8 slice_to_group[MAX_MEDIA_SLICES]; + u32 vecs_mask = VECS_INSTANCES(gt); + u32 gsc_mask = GSCCS_INSTANCES(gt); + u32 vcs_mask = VCS_INSTANCES(gt); + struct guc_sched_group *values; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int group = 0; + int slice; + + xe_gt_assert(gt, xe_gt_is_media_type(gt)); + + /* + * Post-BMG the matching of video engines to slices changes, so for now + * we don't allow this mode on those platforms. + */ + if (gt_to_xe(gt)->info.platform > XE_BATTLEMAGE) + return; + + /* + * On BMG and older platforms a media slice has 2 VCS and a VECS. We + * bundle the GSC with the first slice. + */ + for (slice = 0; slice < MAX_MEDIA_SLICES; slice++) { + if ((vcs_mask & 0x3) || (vecs_mask & 0x1) || (gsc_mask & 0x1)) + slice_to_group[slice] = group++; + + vcs_mask >>= 2; + vecs_mask >>= 1; + gsc_mask >>= 1; + } + + xe_gt_assert(gt, !vcs_mask); + xe_gt_assert(gt, !vecs_mask); + xe_gt_assert(gt, !gsc_mask); + + /* We need at least 2 slices to split them up */ + if (group < 2) + return; + + /* + * If we have more groups than the GuC can support then we don't want to + * expose this specific mode, because the GuC will return an error if we + * try to enable it. + */ + if (group > gt->sriov.pf.policy.guc.sched_groups.max_groups) { + xe_gt_sriov_notice(gt, "media_slice mode has too many groups: %u vs %u\n", + group, gt->sriov.pf.policy.guc.sched_groups.max_groups); + return; + } + + /* The GuC expects an array with a guc_sched_group entry for each group */ + values = drmm_kcalloc(>_to_xe(gt)->drm, group, sizeof(struct guc_sched_group), + GFP_KERNEL); + if (!values) + return; + + for_each_hw_engine(hwe, gt, id) { + u8 guc_class = xe_engine_class_to_guc_class(hwe->class); + + switch (hwe->class) { + case XE_ENGINE_CLASS_VIDEO_DECODE: + slice = hwe->instance / 2; + break; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + slice = hwe->instance; + break; + case XE_ENGINE_CLASS_OTHER: + slice = 0; + break; + default: + xe_gt_assert_msg(gt, false, + "unknown media gt class %u (%s) during EGS setup\n", + hwe->class, hwe->name); + slice = 0; + } + + values[slice_to_group[slice]].engines[guc_class] |= BIT(hwe->logical_instance); + } + + *groups = values; + *num_groups = group; +} + +/** + * xe_sriov_gt_pf_policy_has_sched_groups_support() - Checks whether scheduler + * groups are supported. + * @gt: the &xe_gt + * + * This function can only be called on PF. + * + * Return: true if scheduler groups are supported, false otherwise. + */ +bool xe_sriov_gt_pf_policy_has_sched_groups_support(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + /* + * The GuC supports scheduler groups from v70.53.0, but a fix for it has + * been merged in v70.55.1, so we require the latter. The feature is + * also only enabled on BMG and newer FW. + */ + return GUC_FIRMWARE_VER_AT_LEAST(>->uc.guc, 70, 55, 1) && + gt_to_xe(gt)->info.platform >= XE_BATTLEMAGE; +} + +static void pf_init_sched_groups(struct xe_gt *gt) +{ + enum xe_sriov_sched_group_modes m; + + if (!xe_sriov_gt_pf_policy_has_sched_groups_support(gt)) + return; + + /* + * The GuC interface supports up to 8 groups. However, the GuC only + * fully allocates resources for a subset of groups, based on the number + * of engines and expected usage. The plan is for this to become + * queryable via H2G, but for now GuC FW for all devices supports a + * maximum of 2 groups so we can just hardcode that. + */ + gt->sriov.pf.policy.guc.sched_groups.max_groups = 2; + + for (m = XE_SRIOV_SCHED_GROUPS_DISABLED + 1; m < XE_SRIOV_SCHED_GROUPS_MODES_COUNT; m++) { + u32 *num_groups = >->sriov.pf.policy.guc.sched_groups.modes[m].num_groups; + struct guc_sched_group **groups = + >->sriov.pf.policy.guc.sched_groups.modes[m].groups; + + switch (m) { + case XE_SRIOV_SCHED_GROUPS_MEDIA_SLICES: + /* this mode only has groups on the media GT */ + if (xe_gt_is_media_type(gt)) + pf_sched_group_media_slices(gt, groups, num_groups); + break; + case XE_SRIOV_SCHED_GROUPS_DISABLED: + case XE_SRIOV_SCHED_GROUPS_MODES_COUNT: + /* + * By defining m of type enum xe_sriov_sched_group_modes + * we can get the compiler to automatically flag + * missing cases if new enum entries are added. However, + * to keep the compiler happy we also need to add the + * cases that are excluded from the loop. + */ + xe_gt_assert(gt, false); + break; + } + + xe_gt_assert(gt, *num_groups < GUC_MAX_SCHED_GROUPS); + + if (*num_groups) + gt->sriov.pf.policy.guc.sched_groups.supported_modes |= BIT(m); + } +} + +/** + * xe_sriov_gt_pf_policy_has_multi_group_modes() - check whether the GT supports + * any scheduler modes that have multiple groups + * @gt: the &xe_gt to check + * + * This function can only be called on PF. + * + * Return: true if the GT supports modes with multiple groups, false otherwise. + */ +bool xe_sriov_gt_pf_policy_has_multi_group_modes(struct xe_gt *gt) +{ + return gt->sriov.pf.policy.guc.sched_groups.supported_modes; +} + +/** + * xe_sriov_gt_pf_policy_has_sched_group_mode() - check whether the GT supports + * a specific scheduler group mode + * @gt: the &xe_gt to check + * @mode: the mode to check + * + * This function can only be called on PF. + * + * Return: true if the GT supports the specified mode, false otherwise. + */ +bool xe_sriov_gt_pf_policy_has_sched_group_mode(struct xe_gt *gt, + enum xe_sriov_sched_group_modes mode) +{ + if (mode == XE_SRIOV_SCHED_GROUPS_DISABLED) + return true; + + return gt->sriov.pf.policy.guc.sched_groups.supported_modes & BIT(mode); +} + +static int __pf_provision_sched_groups(struct xe_gt *gt, enum xe_sriov_sched_group_modes mode) +{ + struct guc_sched_group *groups = gt->sriov.pf.policy.guc.sched_groups.modes[mode].groups; + u32 num_groups = gt->sriov.pf.policy.guc.sched_groups.modes[mode].num_groups; + + return pf_push_policy_payload(gt, GUC_KLV_VGT_POLICY_ENGINE_GROUP_CONFIG_KEY, + groups, num_groups * GUC_MAX_ENGINE_CLASSES); +} + +static int pf_provision_sched_groups(struct xe_gt *gt, enum xe_sriov_sched_group_modes mode) +{ + int err; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (!xe_sriov_gt_pf_policy_has_sched_group_mode(gt, mode)) + return -EINVAL; + + /* already in the desired mode */ + if (gt->sriov.pf.policy.guc.sched_groups.current_mode == mode) + return 0; + + /* + * We don't allow changing this with VFs active since it is hard for + * VFs to check. + */ + if (xe_sriov_pf_num_vfs(gt_to_xe(gt))) + return -EBUSY; + + /* + * The GuC silently ignores the setting if any MLRC contexts are + * registered. We expect the admin to make sure that all apps that use + * MLRC are terminated before scheduler groups are enabled, so this + * check is just to make sure that the exec_queue destruction has been + * completed. + */ + if (mode != XE_SRIOV_SCHED_GROUPS_DISABLED && + xe_guc_has_registered_mlrc_queues(>->uc.guc)) { + xe_gt_sriov_notice(gt, "can't enable sched groups with active MLRC queues\n"); + return -EPERM; + } + + err = __pf_provision_sched_groups(gt, mode); + if (err) + return err; + + gt->sriov.pf.policy.guc.sched_groups.current_mode = mode; + + return 0; +} + +static int pf_reprovision_sched_groups(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + /* We only have something to provision if we have possible groups */ + if (!xe_sriov_gt_pf_policy_has_multi_group_modes(gt)) + return 0; + + return __pf_provision_sched_groups(gt, gt->sriov.pf.policy.guc.sched_groups.current_mode); +} + +static void pf_sanitize_sched_groups(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + gt->sriov.pf.policy.guc.sched_groups.current_mode = XE_SRIOV_SCHED_GROUPS_DISABLED; +} + +/** + * xe_gt_sriov_pf_policy_set_sched_groups_mode() - Control the 'sched_groups' policy. + * @gt: the &xe_gt where to apply the policy + * @mode: the sched_group mode to be activated + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_policy_set_sched_groups_mode(struct xe_gt *gt, + enum xe_sriov_sched_group_modes mode) +{ + if (!xe_sriov_gt_pf_policy_has_multi_group_modes(gt)) + return -ENODEV; + + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + return pf_provision_sched_groups(gt, mode); +} + +/** + * xe_gt_sriov_pf_policy_sched_groups_enabled() - check whether the GT has + * multiple scheduler groups enabled + * @gt: the &xe_gt to check + * + * This function can only be called on PF. + * + * Return: true if the GT has multiple groups enabled, false otherwise. + */ +bool xe_gt_sriov_pf_policy_sched_groups_enabled(struct xe_gt *gt) +{ + return gt->sriov.pf.policy.guc.sched_groups.current_mode != XE_SRIOV_SCHED_GROUPS_DISABLED; +} + static void pf_sanitize_guc_policies(struct xe_gt *gt) { pf_sanitize_sched_if_idle(gt); pf_sanitize_reset_engine(gt); pf_sanitize_sample_period(gt); + pf_sanitize_sched_groups(gt); } /** @@ -394,6 +710,7 @@ int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset) err |= pf_reprovision_sched_if_idle(gt); err |= pf_reprovision_reset_engine(gt); err |= pf_reprovision_sample_period(gt); + err |= pf_reprovision_sched_groups(gt); mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); xe_pm_runtime_put(gt_to_xe(gt)); @@ -401,6 +718,18 @@ int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset) return err ? -ENXIO : 0; } +/** + * xe_gt_sriov_pf_policy_init() - Initializes the SW state of the PF policies. + * @gt: the &xe_gt + * + * This function can only be called on PF. This function does not touch the HW, + * but must be called after the engines have been initialized. + */ +void xe_gt_sriov_pf_policy_init(struct xe_gt *gt) +{ + pf_init_sched_groups(gt); +} + static void print_guc_policies(struct drm_printer *p, struct xe_gt_sriov_guc_policies *policy) { drm_printf(p, "%s:\t%s\n", diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h index 2a5dc33dc6d7..bd73aa58f9ca 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h @@ -8,6 +8,8 @@ #include <linux/types.h> +#include "xe_gt_sriov_pf_policy_types.h" + struct drm_printer; struct xe_gt; @@ -17,7 +19,15 @@ int xe_gt_sriov_pf_policy_set_reset_engine(struct xe_gt *gt, bool enable); bool xe_gt_sriov_pf_policy_get_reset_engine(struct xe_gt *gt); int xe_gt_sriov_pf_policy_set_sample_period(struct xe_gt *gt, u32 value); u32 xe_gt_sriov_pf_policy_get_sample_period(struct xe_gt *gt); +bool xe_sriov_gt_pf_policy_has_sched_groups_support(struct xe_gt *gt); +bool xe_sriov_gt_pf_policy_has_multi_group_modes(struct xe_gt *gt); +bool xe_sriov_gt_pf_policy_has_sched_group_mode(struct xe_gt *gt, + enum xe_sriov_sched_group_modes mode); +int xe_gt_sriov_pf_policy_set_sched_groups_mode(struct xe_gt *gt, + enum xe_sriov_sched_group_modes mode); +bool xe_gt_sriov_pf_policy_sched_groups_enabled(struct xe_gt *gt); +void xe_gt_sriov_pf_policy_init(struct xe_gt *gt); void xe_gt_sriov_pf_policy_sanitize(struct xe_gt *gt); int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset); int xe_gt_sriov_pf_policy_print(struct xe_gt *gt, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h index 4de532af135e..97d278190521 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h @@ -8,16 +8,55 @@ #include <linux/types.h> +#include "abi/guc_scheduler_abi.h" + +/** + * enum xe_sriov_sched_group_modes - list of possible scheduler group modes + * @XE_SRIOV_SCHED_GROUPS_DISABLED: no separate groups (i.e., all engines in group 0) + * @XE_SRIOV_SCHED_GROUPS_MEDIA_SLICES: separate groups for each media slice + * @XE_SRIOV_SCHED_GROUPS_MODES_COUNT: number of valid modes + */ +enum xe_sriov_sched_group_modes { + XE_SRIOV_SCHED_GROUPS_DISABLED = 0, + XE_SRIOV_SCHED_GROUPS_MEDIA_SLICES, + XE_SRIOV_SCHED_GROUPS_MODES_COUNT /* must be last */ +}; + +/** + * struct xe_gt_sriov_scheduler_groups - Scheduler groups policy info + * @max_groups: max number of groups supported by the GuC for the platform + * @supported_modes: mask of supported modes + * @current_mode: active scheduler groups mode + * @modes: array of masks and their number for each mode + * @modes.groups: array of engine instance groups in given mode, with each group + * consisting of GUC_MAX_ENGINE_CLASSES engine instances masks. A + * A NULL value indicates that all the engines are in the same + * group for this mode on this GT. + * @modes.num_groups: number of groups in given mode, zero if all the engines + * are in the same group. + */ +struct xe_gt_sriov_scheduler_groups { + u8 max_groups; + u32 supported_modes; + enum xe_sriov_sched_group_modes current_mode; + struct { + struct guc_sched_group *groups; + u32 num_groups; + } modes[XE_SRIOV_SCHED_GROUPS_MODES_COUNT]; +}; + /** * struct xe_gt_sriov_guc_policies - GuC SR-IOV policies. * @sched_if_idle: controls strict scheduling policy. * @reset_engine: controls engines reset on VF switch policy. * @sample_period: adverse events sampling period (in milliseconds). + * @sched_groups: available scheduling group configurations. */ struct xe_gt_sriov_guc_policies { bool sched_if_idle; bool reset_engine; u32 sample_period; + struct xe_gt_sriov_scheduler_groups sched_groups; }; /** diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index b8b391cfc8eb..d91c65dc3496 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -612,6 +612,52 @@ static void vf_cache_gmdid(struct xe_gt *gt) gt->sriov.vf.runtime.gmdid = xe_gt_sriov_vf_gmdid(gt); } +static int vf_query_sched_groups(struct xe_gt *gt) +{ + struct xe_guc *guc = >->uc.guc; + struct xe_uc_fw_version guc_version; + u32 value = 0; + int err; + + xe_gt_sriov_vf_guc_versions(gt, NULL, &guc_version); + + if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 26, 0)) + return 0; + + err = guc_action_query_single_klv32(guc, + GUC_KLV_GLOBAL_CFG_GROUP_SCHEDULING_AVAILABLE_KEY, + &value); + if (unlikely(err)) { + xe_gt_sriov_err(gt, "Failed to obtain sched groups status (%pe)\n", + ERR_PTR(err)); + return err; + } + + /* valid values are 0 (disabled) and 1 (enabled) */ + if (value > 1) { + xe_gt_sriov_err(gt, "Invalid sched groups status %u\n", value); + return -EPROTO; + } + + xe_gt_sriov_dbg(gt, "sched groups %s\n", str_enabled_disabled(value)); + return value; +} + +static int vf_cache_sched_groups_status(struct xe_gt *gt) +{ + int ret; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + ret = vf_query_sched_groups(gt); + if (ret < 0) + return ret; + + gt->sriov.vf.runtime.uses_sched_groups = ret; + + return 0; +} + /** * xe_gt_sriov_vf_query_config - Query SR-IOV config data over MMIO. * @gt: the &xe_gt @@ -641,6 +687,10 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt) if (unlikely(err)) return err; + err = vf_cache_sched_groups_status(gt); + if (unlikely(err)) + return err; + if (has_gmdid(xe)) vf_cache_gmdid(gt); @@ -648,6 +698,23 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt) } /** + * xe_gt_sriov_vf_sched_groups_enabled() - Check if PF has enabled multiple + * scheduler groups + * @gt: the &xe_gt + * + * This function is for VF use only. + * + * Return: true if shed groups were enabled, false otherwise. + */ +bool xe_gt_sriov_vf_sched_groups_enabled(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_gt_assert(gt, gt->sriov.vf.guc_version.major); + + return gt->sriov.vf.runtime.uses_sched_groups; +} + +/** * xe_gt_sriov_vf_guc_ids - VF GuC context IDs configuration. * @gt: the &xe_gt * diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index af40276790fa..7d97189c2d3d 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -30,6 +30,7 @@ bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt); u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt); u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt); +bool xe_gt_sriov_vf_sched_groups_enabled(struct xe_gt *gt); u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg); void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index 510c33116fbd..9a6b5672d569 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -27,6 +27,8 @@ struct xe_gt_sriov_vf_selfconfig { struct xe_gt_sriov_vf_runtime { /** @gmdid: cached value of the GDMID register. */ u32 gmdid; + /** @uses_sched_groups: whether PF enabled sched groups or not. */ + bool uses_sched_groups; /** @regs_size: size of runtime register array. */ u32 regs_size; /** @num_regs: number of runtime registers in the array. */ diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 09ac092c3687..44360437beeb 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -175,7 +175,7 @@ static bool needs_wa_dual_queue(struct xe_gt *gt) * the DUAL_QUEUE_WA on all newer platforms on GTs that have CCS engines * to move management back to the GuC. */ - if (CCS_MASK(gt) && GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) + if (CCS_INSTANCES(gt) && GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) return true; return false; diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h b/drivers/gpu/drm/xe/xe_guc_capture.h index 20a078dc4b85..34d6fdc64f56 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.h +++ b/drivers/gpu/drm/xe/xe_guc_capture.h @@ -8,8 +8,8 @@ #include <linux/types.h> #include "abi/guc_capture_abi.h" +#include "abi/guc_scheduler_abi.h" #include "xe_guc.h" -#include "xe_guc_fwif.h" struct xe_exec_queue; struct xe_guc; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index c3df9b3f1b4d..dfbf76037b04 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -206,7 +206,9 @@ static void g2h_fence_cancel(struct g2h_fence *g2h_fence) { g2h_fence->cancel = true; g2h_fence->fail = true; - g2h_fence->done = true; + + /* WRITE_ONCE pairs with READ_ONCEs in guc_ct_send_recv. */ + WRITE_ONCE(g2h_fence->done, true); } static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) @@ -527,7 +529,12 @@ static void guc_ct_change_state(struct xe_guc_ct *ct, if (ct->g2h_outstanding) xe_pm_runtime_put(ct_to_xe(ct)); ct->g2h_outstanding = 0; - ct->state = state; + + /* + * WRITE_ONCE pairs with READ_ONCEs in xe_guc_ct_initialized and + * xe_guc_ct_enabled. + */ + WRITE_ONCE(ct->state, state); xe_gt_dbg(gt, "GuC CT communication channel %s\n", state == XE_GUC_CT_STATE_STOPPED ? "stopped" : @@ -1294,10 +1301,13 @@ retry_same_fence: return ret; } - ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); + /* READ_ONCEs pairs with WRITE_ONCEs in parse_g2h_response + * and g2h_fence_cancel. + */ + ret = wait_event_timeout(ct->g2h_fence_wq, READ_ONCE(g2h_fence.done), HZ); if (!ret) { LNL_FLUSH_WORK(&ct->g2h_worker); - if (g2h_fence.done) { + if (READ_ONCE(g2h_fence.done)) { xe_gt_warn(gt, "G2H fence %u, action %04x, done\n", g2h_fence.seqno, action[0]); ret = 1; @@ -1498,7 +1508,8 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); - g2h_fence->done = true; + /* WRITE_ONCE pairs with READ_ONCEs in guc_ct_send_recv. */ + WRITE_ONCE(g2h_fence->done, true); smp_mb(); wake_up_all(&ct->g2h_fence_wq); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 5599939f8fe1..767365a33dee 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -30,12 +30,14 @@ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb) static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct) { - return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED; + /* READ_ONCE pairs with WRITE_ONCE in guc_ct_change_state. */ + return READ_ONCE(ct->state) != XE_GUC_CT_STATE_NOT_INITIALIZED; } static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) { - return ct->state == XE_GUC_CT_STATE_ENABLED; + /* READ_ONCE pairs with WRITE_ONCE in guc_ct_change_state. */ + return READ_ONCE(ct->state) == XE_GUC_CT_STATE_ENABLED; } static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct) diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index a04faec477ae..a33ea288b907 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -10,6 +10,7 @@ #include "abi/guc_capture_abi.h" #include "abi/guc_klvs_abi.h" +#include "abi/guc_scheduler_abi.h" #include "xe_hw_engine_types.h" #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4 @@ -19,59 +20,6 @@ #define G2H_LEN_DW_MULTI_QUEUE_CONTEXT 3 #define G2H_LEN_DW_PAGE_RECLAMATION 3 -#define GUC_ID_MAX 65535 -#define GUC_ID_UNKNOWN 0xffffffff - -#define GUC_CONTEXT_DISABLE 0 -#define GUC_CONTEXT_ENABLE 1 - -#define GUC_CLIENT_PRIORITY_KMD_HIGH 0 -#define GUC_CLIENT_PRIORITY_HIGH 1 -#define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 -#define GUC_CLIENT_PRIORITY_NORMAL 3 -#define GUC_CLIENT_PRIORITY_NUM 4 - -#define GUC_RENDER_ENGINE 0 -#define GUC_VIDEO_ENGINE 1 -#define GUC_BLITTER_ENGINE 2 -#define GUC_VIDEOENHANCE_ENGINE 3 -#define GUC_VIDEO_ENGINE2 4 -#define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1) - -#define GUC_RENDER_CLASS 0 -#define GUC_VIDEO_CLASS 1 -#define GUC_VIDEOENHANCE_CLASS 2 -#define GUC_BLITTER_CLASS 3 -#define GUC_COMPUTE_CLASS 4 -#define GUC_GSC_OTHER_CLASS 5 -#define GUC_LAST_ENGINE_CLASS GUC_GSC_OTHER_CLASS -#define GUC_MAX_ENGINE_CLASSES 16 -#define GUC_MAX_INSTANCES_PER_CLASS 32 - -#define GUC_CONTEXT_NORMAL 0 -#define GUC_CONTEXT_COMPRESSION_SAVE 1 -#define GUC_CONTEXT_COMPRESSION_RESTORE 2 -#define GUC_CONTEXT_COUNT (GUC_CONTEXT_COMPRESSION_RESTORE + 1) - -/* Helper for context registration H2G */ -struct guc_ctxt_registration_info { - u32 flags; - u32 context_idx; - u32 engine_class; - u32 engine_submit_mask; - u32 wq_desc_lo; - u32 wq_desc_hi; - u32 wq_base_lo; - u32 wq_base_hi; - u32 wq_size; - u32 cgp_lo; - u32 cgp_hi; - u32 hwlrca_lo; - u32 hwlrca_hi; -}; -#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) -#define CONTEXT_REGISTRATION_FLAG_TYPE GENMASK(2, 1) - /* 32-bit KLV structure as used by policy updates and others */ struct guc_klv_generic_dw_t { u32 kl; diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c index 146a6eda9e06..97600edda837 100644 --- a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c +++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c @@ -21,11 +21,16 @@ const char *xe_guc_klv_key_to_string(u16 key) { switch (key) { + /* GuC Global Config KLVs */ + case GUC_KLV_GLOBAL_CFG_GROUP_SCHEDULING_AVAILABLE_KEY: + return "group_scheduling_available"; /* VGT POLICY keys */ case GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY: return "sched_if_idle"; case GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY: return "sample_period"; + case GUC_KLV_VGT_POLICY_ENGINE_GROUP_CONFIG_KEY: + return "engine_group_config"; case GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY: return "reset_engine"; /* VF CFG keys */ @@ -51,6 +56,10 @@ const char *xe_guc_klv_key_to_string(u16 key) return "begin_ctx_id"; case GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY: return "sched_priority"; + case GUC_KLV_VF_CFG_ENGINE_GROUP_EXEC_QUANTUM_KEY: + return "sched_groups_exec_quantum"; + case GUC_KLV_VF_CFG_ENGINE_GROUP_PREEMPT_TIMEOUT_KEY: + return "sched_groups_preempt_timeout"; /* VF CFG threshold keys */ #define define_threshold_key_to_string_case(TAG, NAME, ...) \ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 0b590271c326..7a4218f76024 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -407,7 +407,7 @@ static int guc_init_global_schedule_policy(struct xe_guc *guc) *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; - if (CCS_MASK(guc_to_gt(guc))) + if (CCS_INSTANCES(guc_to_gt(guc))) emit = emit_render_compute_yield_klv(emit); count = emit - data; @@ -673,6 +673,23 @@ static void set_exec_queue_group_banned(struct xe_exec_queue *q) mutex_unlock(&group->list_lock); } +/* Helper for context registration H2G */ +struct guc_ctxt_registration_info { + u32 flags; + u32 context_idx; + u32 engine_class; + u32 engine_submit_mask; + u32 wq_desc_lo; + u32 wq_desc_hi; + u32 wq_base_lo; + u32 wq_base_hi; + u32 wq_size; + u32 cgp_lo; + u32 cgp_hi; + u32 hwlrca_lo; + u32 hwlrca_hi; +}; + #define parallel_read(xe_, map_, field_) \ xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ field_) @@ -3547,6 +3564,27 @@ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) } /** + * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues + * registered with the GuC + * @guc: GuC. + * + * Return: true if any MLRC queue is registered with the GuC, false otherwise. + */ +bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + guard(mutex)(&guc->submission_state.lock); + + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + if (q->width > 1) + return true; + + return false; +} + +/** * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all * exec queues registered to given GuC. * @guc: the &xe_guc struct instance diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 4d89b2975fe9..b3839a90c142 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -52,6 +52,8 @@ xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapsh void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type); +bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc); + int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch); #endif diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index f3b66b55acfb..00eef41a9e36 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -35,6 +35,7 @@ #include "xe_sa.h" #include "xe_sched_job.h" #include "xe_sriov_vf_ccs.h" +#include "xe_svm.h" #include "xe_sync.h" #include "xe_trace_bo.h" #include "xe_validation.h" @@ -471,7 +472,8 @@ int xe_migrate_init(struct xe_migrate *m) EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT | EXEC_QUEUE_FLAG_HIGH_PRIORITY | - EXEC_QUEUE_FLAG_MIGRATE, 0); + EXEC_QUEUE_FLAG_MIGRATE | + EXEC_QUEUE_FLAG_LOW_LATENCY, 0); } else { m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, @@ -2048,7 +2050,8 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, u64 pte; xe_tile_assert(m->tile, sram_addr[i].proto == - DRM_INTERCONNECT_SYSTEM); + DRM_INTERCONNECT_SYSTEM || + sram_addr[i].proto == XE_INTERCONNECT_P2P); xe_tile_assert(m->tile, addr); xe_tile_assert(m->tile, PAGE_ALIGNED(addr)); @@ -2113,6 +2116,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, unsigned long sram_offset, struct drm_pagemap_addr *sram_addr, u64 vram_addr, + struct dma_fence *deps, const enum xe_migrate_copy_dir dir) { struct xe_gt *gt = m->tile->primary_gt; @@ -2201,6 +2205,14 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB); + if (deps && !dma_fence_is_signaled(deps)) { + dma_fence_get(deps); + err = drm_sched_job_add_dependency(&job->drm, deps); + if (err) + dma_fence_wait(deps, false); + err = 0; + } + mutex_lock(&m->job_mutex); xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); @@ -2226,6 +2238,8 @@ err: * @npages: Number of pages to migrate. * @src_addr: Array of DMA information (source of migrate) * @dst_addr: Device physical address of VRAM (destination of migrate) + * @deps: struct dma_fence representing the dependencies that need + * to be signaled before migration. * * Copy from an array dma addresses to a VRAM device physical address * @@ -2235,10 +2249,11 @@ err: struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, unsigned long npages, struct drm_pagemap_addr *src_addr, - u64 dst_addr) + u64 dst_addr, + struct dma_fence *deps) { return xe_migrate_vram(m, npages * PAGE_SIZE, 0, src_addr, dst_addr, - XE_MIGRATE_COPY_TO_VRAM); + deps, XE_MIGRATE_COPY_TO_VRAM); } /** @@ -2247,6 +2262,8 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, * @npages: Number of pages to migrate. * @src_addr: Device physical address of VRAM (source of migrate) * @dst_addr: Array of DMA information (destination of migrate) + * @deps: struct dma_fence representing the dependencies that need + * to be signaled before migration. * * Copy from a VRAM device physical address to an array dma addresses * @@ -2256,10 +2273,11 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, unsigned long npages, u64 src_addr, - struct drm_pagemap_addr *dst_addr) + struct drm_pagemap_addr *dst_addr, + struct dma_fence *deps) { return xe_migrate_vram(m, npages * PAGE_SIZE, 0, dst_addr, src_addr, - XE_MIGRATE_COPY_TO_SRAM); + deps, XE_MIGRATE_COPY_TO_SRAM); } static void xe_migrate_dma_unmap(struct xe_device *xe, @@ -2435,7 +2453,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, __fence = xe_migrate_vram(m, current_bytes, (unsigned long)buf & ~PAGE_MASK, &pagemap_addr[current_page], - vram_addr, write ? + vram_addr, NULL, write ? XE_MIGRATE_COPY_TO_VRAM : XE_MIGRATE_COPY_TO_SRAM); if (IS_ERR(__fence)) { diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 464c05dde1ba..1522afb37dcf 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -116,12 +116,14 @@ int xe_migrate_init(struct xe_migrate *m); struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, unsigned long npages, struct drm_pagemap_addr *src_addr, - u64 dst_addr); + u64 dst_addr, + struct dma_fence *deps); struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, unsigned long npages, u64 src_addr, - struct drm_pagemap_addr *dst_addr); + struct drm_pagemap_addr *dst_addr, + struct dma_fence *deps); struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_bo *src_bo, diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 18d4e6b5c319..a1fdca451ce0 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -27,6 +27,7 @@ #include "xe_macros.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_pci_rebar.h" #include "xe_pci_sriov.h" #include "xe_pci_types.h" #include "xe_pm.h" @@ -370,6 +371,7 @@ static const struct xe_device_desc bmg_desc = { .has_i2c = true, .has_late_bind = true, .has_pre_prod_wa = 1, + .has_soc_remapper_telem = true, .has_sriov = true, .has_mem_copy_instr = true, .max_gt_per_tile = 2, @@ -421,6 +423,8 @@ static const struct xe_device_desc cri_desc = { .has_mbx_power_limits = true, .has_mert = true, .has_pre_prod_wa = 1, + .has_soc_remapper_sysctrl = true, + .has_soc_remapper_telem = true, .has_sriov = true, .max_gt_per_tile = 2, .require_force_probe = true, @@ -692,6 +696,8 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.has_page_reclaim_hw_assist = desc->has_page_reclaim_hw_assist; xe->info.has_pre_prod_wa = desc->has_pre_prod_wa; xe->info.has_pxp = desc->has_pxp; + xe->info.has_soc_remapper_sysctrl = desc->has_soc_remapper_sysctrl; + xe->info.has_soc_remapper_telem = desc->has_soc_remapper_telem; xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) && desc->has_sriov; xe->info.has_mem_copy_instr = desc->has_mem_copy_instr; @@ -1016,7 +1022,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - xe_vram_resize_bar(xe); + xe_pci_rebar_resize(xe); err = xe_device_probe_early(xe); /* diff --git a/drivers/gpu/drm/xe/xe_pci_rebar.c b/drivers/gpu/drm/xe/xe_pci_rebar.c new file mode 100644 index 000000000000..7e2c7079b6ff --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci_rebar.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/pci.h> +#include <linux/types.h> + +#include "regs/xe_bars.h" +#include "xe_device_types.h" +#include "xe_module.h" +#include "xe_pci_rebar.h" +#include "xe_printk.h" + +static void resize_bar(struct xe_device *xe, int resno, resource_size_t size) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int bar_size = pci_rebar_bytes_to_size(size); + int ret; + + ret = pci_resize_resource(pdev, resno, bar_size, 0); + if (ret) { + xe_info(xe, "Failed to resize BAR%d to %dMiB (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n", + resno, 1 << bar_size, ERR_PTR(ret)); + return; + } + + xe_info(xe, "BAR%d resized to %dMiB\n", resno, 1 << bar_size); +} + +/* + * xe_pci_rebar_resize - Resize the LMEMBAR + * @xe: xe device instance + * + * If vram_bar_size module param is set, attempt to set to the requested size + * else set to maximum possible size. + */ +void xe_pci_rebar_resize(struct xe_device *xe) +{ + int force_vram_bar_size = xe_modparam.force_vram_bar_size; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct pci_bus *root = pdev->bus; + resource_size_t current_size; + resource_size_t rebar_size; + struct resource *root_res; + int max_size, i; + u32 pci_cmd; + + /* gather some relevant info */ + current_size = pci_resource_len(pdev, LMEM_BAR); + + if (force_vram_bar_size < 0) + return; + + /* set to a specific size? */ + if (force_vram_bar_size) { + rebar_size = pci_rebar_bytes_to_size(force_vram_bar_size * + (resource_size_t)SZ_1M); + + if (!pci_rebar_size_supported(pdev, LMEM_BAR, rebar_size)) { + xe_info(xe, "Requested size %lluMiB is not supported by rebar sizes: 0x%llx. Leaving default: %lluMiB\n", + (u64)pci_rebar_size_to_bytes(rebar_size) >> ilog2(SZ_1M), + pci_rebar_get_possible_sizes(pdev, LMEM_BAR), + (u64)current_size >> ilog2(SZ_1M)); + return; + } + + rebar_size = pci_rebar_size_to_bytes(rebar_size); + if (rebar_size == current_size) + return; + } else { + max_size = pci_rebar_get_max_size(pdev, LMEM_BAR); + if (max_size < 0) + return; + rebar_size = pci_rebar_size_to_bytes(max_size); + + /* only resize if larger than current */ + if (rebar_size <= current_size) + return; + } + + xe_info(xe, "Attempting to resize bar from %lluMiB -> %lluMiB\n", + (u64)current_size >> ilog2(SZ_1M), (u64)rebar_size >> ilog2(SZ_1M)); + + while (root->parent) + root = root->parent; + + pci_bus_for_each_resource(root, root_res, i) { + if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && + (u64)root_res->start > 0x100000000ul) + break; + } + + if (!root_res) { + xe_info(xe, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n"); + return; + } + + pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); + + resize_bar(xe, LMEM_BAR, rebar_size); + + pci_assign_unassigned_bus_resources(pdev->bus); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); +} diff --git a/drivers/gpu/drm/xe/xe_pci_rebar.h b/drivers/gpu/drm/xe/xe_pci_rebar.h new file mode 100644 index 000000000000..8677921ac363 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci_rebar.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PCI_REBAR_H_ +#define _XE_PCI_REBAR_H_ + +struct xe_device; + +void xe_pci_rebar_resize(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 3bb51d155951..5f20f56571d1 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -53,6 +53,8 @@ struct xe_device_desc { u8 has_pre_prod_wa:1; u8 has_page_reclaim_hw_assist:1; u8 has_pxp:1; + u8 has_soc_remapper_sysctrl:1; + u8 has_soc_remapper_telem:1; u8 has_sriov:1; u8 needs_scratch:1; u8 skip_guc_pc:1; diff --git a/drivers/gpu/drm/xe/xe_soc_remapper.c b/drivers/gpu/drm/xe/xe_soc_remapper.c new file mode 100644 index 000000000000..1c391d719196 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_soc_remapper.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "regs/xe_soc_remapper_regs.h" +#include "xe_mmio.h" +#include "xe_soc_remapper.h" + +static void xe_soc_remapper_set_region(struct xe_device *xe, struct xe_reg reg, + u32 mask, u32 val) +{ + guard(spinlock_irqsave)(&xe->soc_remapper.lock); + xe_mmio_rmw32(xe_root_tile_mmio(xe), reg, mask, val); +} + +static void xe_soc_remapper_set_telem_region(struct xe_device *xe, u32 index) +{ + xe_soc_remapper_set_region(xe, SG_REMAP_INDEX1, SG_REMAP_TELEM_MASK, + REG_FIELD_PREP(SG_REMAP_TELEM_MASK, index)); +} + +static void xe_soc_remapper_set_sysctrl_region(struct xe_device *xe, u32 index) +{ + xe_soc_remapper_set_region(xe, SG_REMAP_INDEX1, SG_REMAP_SYSCTRL_MASK, + REG_FIELD_PREP(SG_REMAP_SYSCTRL_MASK, index)); +} + +/** + * xe_soc_remapper_init() - Initialize SoC remapper + * @xe: Pointer to xe device. + * + * Initialize SoC remapper. + * + * Return: 0 on success, error code on failure + */ +int xe_soc_remapper_init(struct xe_device *xe) +{ + bool has_soc_remapper = xe->info.has_soc_remapper_telem || + xe->info.has_soc_remapper_sysctrl; + + if (has_soc_remapper) + spin_lock_init(&xe->soc_remapper.lock); + + if (xe->info.has_soc_remapper_telem) + xe->soc_remapper.set_telem_region = xe_soc_remapper_set_telem_region; + + if (xe->info.has_soc_remapper_sysctrl) + xe->soc_remapper.set_sysctrl_region = xe_soc_remapper_set_sysctrl_region; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_soc_remapper.h b/drivers/gpu/drm/xe/xe_soc_remapper.h new file mode 100644 index 000000000000..1060ad0f5abc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_soc_remapper.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SOC_REMAPPER_H_ +#define _XE_SOC_REMAPPER_H_ + +#include "xe_device_types.h" + +int xe_soc_remapper_init(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 93550c7c84ac..fa2ee2c08f31 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -3,7 +3,12 @@ * Copyright © 2024 Intel Corporation */ +#include <linux/pci-p2pdma.h> + #include <drm/drm_drv.h> +#include <drm/drm_managed.h> +#include <drm/drm_pagemap.h> +#include <drm/drm_pagemap_util.h> #include "xe_bo.h" #include "xe_exec_queue_types.h" @@ -19,6 +24,38 @@ #include "xe_vm_types.h" #include "xe_vram_types.h" +/* Identifies subclasses of struct drm_pagemap_peer */ +#define XE_PEER_PAGEMAP ((void *)0ul) +#define XE_PEER_VM ((void *)1ul) + +/** + * DOC: drm_pagemap reference-counting in xe: + * + * In addition to the drm_pagemap internal reference counting by its zone + * device data, the xe driver holds the following long-time references: + * + * - struct xe_pagemap: + * The xe_pagemap struct derives from struct drm_pagemap and uses its + * reference count. + * - SVM-enabled VMs: + * SVM-enabled VMs look up and keeps a reference to all xe_pagemaps on + * the same device. + * - VMAs: + * vmas keep a reference on the drm_pagemap indicated by a gpu_madvise() + * call. + * + * In addition, all drm_pagemap or xe_pagemap pointers where lifetime cannot + * be guaranteed by a vma reference under the vm lock should keep a reference. + * That includes the range->pages.dpagemap pointer. + */ + +static int xe_svm_get_pagemaps(struct xe_vm *vm); + +void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem) +{ + return force_smem ? NULL : vm->svm.peer.owner; +} + static bool xe_svm_range_in_vram(struct xe_svm_range *range) { /* @@ -276,10 +313,14 @@ static int __xe_svm_garbage_collector(struct xe_vm *vm, static void xe_vma_set_default_attributes(struct xe_vma *vma) { - vma->attr.preferred_loc.devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE; - vma->attr.preferred_loc.migration_policy = DRM_XE_MIGRATE_ALL_PAGES; - vma->attr.pat_index = vma->attr.default_pat_index; - vma->attr.atomic_access = DRM_XE_ATOMIC_UNDEFINED; + struct xe_vma_mem_attr default_attr = { + .preferred_loc.devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, + .preferred_loc.migration_policy = DRM_XE_MIGRATE_ALL_PAGES, + .pat_index = vma->attr.default_pat_index, + .atomic_access = DRM_XE_ATOMIC_UNDEFINED, + }; + + xe_vma_mem_attr_copy(&vma->attr, &default_attr); } static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 start, u64 end) @@ -390,27 +431,47 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w) #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) -static struct xe_vram_region *page_to_vr(struct page *page) +static struct xe_vram_region *xe_pagemap_to_vr(struct xe_pagemap *xpagemap) { - return container_of(page_pgmap(page), struct xe_vram_region, pagemap); + return xpagemap->vr; } -static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr, - struct page *page) +static struct xe_pagemap *xe_page_to_pagemap(struct page *page) { - u64 dpa; + return container_of(page_pgmap(page), struct xe_pagemap, pagemap); +} + +static struct xe_vram_region *xe_page_to_vr(struct page *page) +{ + return xe_pagemap_to_vr(xe_page_to_pagemap(page)); +} + +static u64 xe_page_to_dpa(struct page *page) +{ + struct xe_pagemap *xpagemap = xe_page_to_pagemap(page); + struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap); + u64 hpa_base = xpagemap->hpa_base; u64 pfn = page_to_pfn(page); u64 offset; + u64 dpa; xe_assert(vr->xe, is_device_private_page(page)); - xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= vr->hpa_base); + xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= hpa_base); - offset = (pfn << PAGE_SHIFT) - vr->hpa_base; + offset = (pfn << PAGE_SHIFT) - hpa_base; dpa = vr->dpa_base + offset; return dpa; } +static u64 xe_page_to_pcie(struct page *page) +{ + struct xe_pagemap *xpagemap = xe_page_to_pagemap(page); + struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap); + + return xe_page_to_dpa(page) - vr->dpa_base + vr->io_start; +} + enum xe_svm_copy_dir { XE_SVM_COPY_TO_VRAM, XE_SVM_COPY_TO_SRAM, @@ -472,7 +533,8 @@ static void xe_svm_copy_us_stats_incr(struct xe_gt *gt, static int xe_svm_copy(struct page **pages, struct drm_pagemap_addr *pagemap_addr, - unsigned long npages, const enum xe_svm_copy_dir dir) + unsigned long npages, const enum xe_svm_copy_dir dir, + struct dma_fence *pre_migrate_fence) { struct xe_vram_region *vr = NULL; struct xe_gt *gt = NULL; @@ -509,11 +571,11 @@ static int xe_svm_copy(struct page **pages, continue; if (!vr && spage) { - vr = page_to_vr(spage); + vr = xe_page_to_vr(spage); gt = xe_migrate_exec_queue(vr->migrate)->gt; xe = vr->xe; } - XE_WARN_ON(spage && page_to_vr(spage) != vr); + XE_WARN_ON(spage && xe_page_to_vr(spage) != vr); /* * CPU page and device page valid, capture physical address on @@ -521,7 +583,7 @@ static int xe_svm_copy(struct page **pages, * device pages. */ if (pagemap_addr[i].addr && spage) { - __vram_addr = xe_vram_region_page_to_dpa(vr, spage); + __vram_addr = xe_page_to_dpa(spage); if (vram_addr == XE_VRAM_ADDR_INVALID) { vram_addr = __vram_addr; pos = i; @@ -561,7 +623,8 @@ static int xe_svm_copy(struct page **pages, __fence = xe_migrate_from_vram(vr->migrate, i - pos + incr, vram_addr, - &pagemap_addr[pos]); + &pagemap_addr[pos], + pre_migrate_fence); } else { vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", @@ -570,13 +633,14 @@ static int xe_svm_copy(struct page **pages, __fence = xe_migrate_to_vram(vr->migrate, i - pos + incr, &pagemap_addr[pos], - vram_addr); + vram_addr, + pre_migrate_fence); } if (IS_ERR(__fence)) { err = PTR_ERR(__fence); goto err_out; } - + pre_migrate_fence = NULL; dma_fence_put(fence); fence = __fence; } @@ -599,20 +663,22 @@ static int xe_svm_copy(struct page **pages, vram_addr, (u64)pagemap_addr[pos].addr, 1); __fence = xe_migrate_from_vram(vr->migrate, 1, vram_addr, - &pagemap_addr[pos]); + &pagemap_addr[pos], + pre_migrate_fence); } else { vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", (u64)pagemap_addr[pos].addr, vram_addr, 1); __fence = xe_migrate_to_vram(vr->migrate, 1, &pagemap_addr[pos], - vram_addr); + vram_addr, + pre_migrate_fence); } if (IS_ERR(__fence)) { err = PTR_ERR(__fence); goto err_out; } - + pre_migrate_fence = NULL; dma_fence_put(fence); fence = __fence; } @@ -625,6 +691,8 @@ err_out: dma_fence_wait(fence, false); dma_fence_put(fence); } + if (pre_migrate_fence) + dma_fence_wait(pre_migrate_fence, false); /* * XXX: We can't derive the GT here (or anywhere in this functions, but @@ -641,16 +709,20 @@ err_out: static int xe_svm_copy_to_devmem(struct page **pages, struct drm_pagemap_addr *pagemap_addr, - unsigned long npages) + unsigned long npages, + struct dma_fence *pre_migrate_fence) { - return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_VRAM); + return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_VRAM, + pre_migrate_fence); } static int xe_svm_copy_to_ram(struct page **pages, struct drm_pagemap_addr *pagemap_addr, - unsigned long npages) + unsigned long npages, + struct dma_fence *pre_migrate_fence) { - return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_SRAM); + return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_SRAM, + pre_migrate_fence); } static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) @@ -663,13 +735,16 @@ static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation) struct xe_bo *bo = to_xe_bo(devmem_allocation); struct xe_device *xe = xe_bo_device(bo); + dma_fence_put(devmem_allocation->pre_migrate_fence); xe_bo_put_async(bo); xe_pm_runtime_put(xe); } -static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) +static u64 block_offset_to_pfn(struct drm_pagemap *dpagemap, u64 offset) { - return PHYS_PFN(offset + vr->hpa_base); + struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap); + + return PHYS_PFN(offset + xpagemap->hpa_base); } static struct drm_buddy *vram_to_buddy(struct xe_vram_region *vram) @@ -689,7 +764,8 @@ static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocati list_for_each_entry(block, blocks, link) { struct xe_vram_region *vr = block->private; struct drm_buddy *buddy = vram_to_buddy(vr); - u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block)); + u64 block_pfn = block_offset_to_pfn(devmem_allocation->dpagemap, + drm_buddy_block_offset(block)); int i; for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i) @@ -706,6 +782,11 @@ static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = { .copy_to_ram = xe_svm_copy_to_ram, }; +#else +static int xe_svm_get_pagemaps(struct xe_vm *vm) +{ + return 0; +} #endif static const struct drm_gpusvm_ops gpusvm_ops = { @@ -720,6 +801,48 @@ static const unsigned long fault_chunk_sizes[] = { SZ_4K, }; +static void xe_pagemap_put(struct xe_pagemap *xpagemap) +{ + drm_pagemap_put(&xpagemap->dpagemap); +} + +static void xe_svm_put_pagemaps(struct xe_vm *vm) +{ + struct xe_device *xe = vm->xe; + struct xe_tile *tile; + int id; + + for_each_tile(tile, xe, id) { + struct xe_pagemap *xpagemap = vm->svm.pagemaps[id]; + + if (xpagemap) + xe_pagemap_put(xpagemap); + vm->svm.pagemaps[id] = NULL; + } +} + +static struct device *xe_peer_to_dev(struct drm_pagemap_peer *peer) +{ + if (peer->private == XE_PEER_PAGEMAP) + return container_of(peer, struct xe_pagemap, peer)->dpagemap.drm->dev; + + return container_of(peer, struct xe_vm, svm.peer)->xe->drm.dev; +} + +static bool xe_has_interconnect(struct drm_pagemap_peer *peer1, + struct drm_pagemap_peer *peer2) +{ + struct device *dev1 = xe_peer_to_dev(peer1); + struct device *dev2 = xe_peer_to_dev(peer2); + + if (dev1 == dev2) + return true; + + return pci_p2pdma_distance(to_pci_dev(dev1), dev2, true) >= 0; +} + +static DRM_PAGEMAP_OWNER_LIST_DEFINE(xe_owner_list); + /** * xe_svm_init() - SVM initialize * @vm: The VM. @@ -738,12 +861,30 @@ int xe_svm_init(struct xe_vm *vm) INIT_WORK(&vm->svm.garbage_collector.work, xe_svm_garbage_collector_work_func); + vm->svm.peer.private = XE_PEER_VM; + err = drm_pagemap_acquire_owner(&vm->svm.peer, &xe_owner_list, + xe_has_interconnect); + if (err) + return err; + + err = xe_svm_get_pagemaps(vm); + if (err) { + drm_pagemap_release_owner(&vm->svm.peer); + return err; + } + err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, current->mm, 0, vm->size, xe_modparam.svm_notifier_size * SZ_1M, &gpusvm_ops, fault_chunk_sizes, ARRAY_SIZE(fault_chunk_sizes)); drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); + + if (err) { + xe_svm_put_pagemaps(vm); + drm_pagemap_release_owner(&vm->svm.peer); + return err; + } } else { err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", &vm->xe->drm, NULL, 0, 0, 0, NULL, @@ -763,6 +904,8 @@ void xe_svm_close(struct xe_vm *vm) { xe_assert(vm->xe, xe_vm_is_closed(vm)); flush_work(&vm->svm.garbage_collector.work); + xe_svm_put_pagemaps(vm); + drm_pagemap_release_owner(&vm->svm.peer); } /** @@ -778,13 +921,34 @@ void xe_svm_fini(struct xe_vm *vm) drm_gpusvm_fini(&vm->svm.gpusvm); } +static bool xe_svm_range_has_pagemap_locked(const struct xe_svm_range *range, + const struct drm_pagemap *dpagemap) +{ + return range->base.pages.dpagemap == dpagemap; +} + +static bool xe_svm_range_has_pagemap(struct xe_svm_range *range, + const struct drm_pagemap *dpagemap) +{ + struct xe_vm *vm = range_to_vm(&range->base); + bool ret; + + xe_svm_notifier_lock(vm); + ret = xe_svm_range_has_pagemap_locked(range, dpagemap); + xe_svm_notifier_unlock(vm); + + return ret; +} + static bool xe_svm_range_is_valid(struct xe_svm_range *range, struct xe_tile *tile, - bool devmem_only) + bool devmem_only, + const struct drm_pagemap *dpagemap) + { return (xe_vm_has_valid_gpu_mapping(tile, range->tile_present, range->tile_invalidated) && - (!devmem_only || xe_svm_range_in_vram(range))); + (!devmem_only || xe_svm_range_has_pagemap(range, dpagemap))); } /** xe_svm_range_migrate_to_smem() - Move range pages from VRAM to SMEM @@ -805,7 +969,8 @@ void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range) * @vm: xe_vm pointer * @range: Pointer to the SVM range structure * @tile_mask: Mask representing the tiles to be checked - * @devmem_preferred : if true range needs to be in devmem + * @dpagemap: if !%NULL, the range is expected to be present + * in device memory identified by this parameter. * * The xe_svm_range_validate() function checks if a range is * valid and located in the desired memory region. @@ -814,14 +979,15 @@ void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range) */ bool xe_svm_range_validate(struct xe_vm *vm, struct xe_svm_range *range, - u8 tile_mask, bool devmem_preferred) + u8 tile_mask, const struct drm_pagemap *dpagemap) { bool ret; xe_svm_notifier_lock(vm); - ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask && - (devmem_preferred == range->base.pages.flags.has_devmem_pages); + ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask; + if (dpagemap) + ret = ret && xe_svm_range_has_pagemap_locked(range, dpagemap); xe_svm_notifier_unlock(vm); @@ -856,7 +1022,13 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, struct mm_struct *mm, unsigned long timeslice_ms) { - struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap); + struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap); + struct drm_pagemap_migrate_details mdetails = { + .timeslice_ms = timeslice_ms, + .source_peer_migrates = 1, + }; + struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap); + struct dma_fence *pre_migrate_fence = NULL; struct xe_device *xe = vr->xe; struct device *dev = xe->drm.dev; struct drm_buddy_block *block; @@ -883,8 +1055,20 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, break; } + /* Ensure that any clearing or async eviction will complete before migration. */ + if (!dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) { + err = dma_resv_get_singleton(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + &pre_migrate_fence); + if (err) + dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + else if (pre_migrate_fence) + dma_fence_enable_sw_signaling(pre_migrate_fence); + } + drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, - &dpagemap_devmem_ops, dpagemap, end - start); + &dpagemap_devmem_ops, dpagemap, end - start, + pre_migrate_fence); blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; list_for_each_entry(block, blocks, link) @@ -894,11 +1078,9 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, /* Ensure the device has a pm ref while there are device pages active. */ xe_pm_runtime_get_noresume(xe); + /* Consumes the devmem allocation ref. */ err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, - start, end, timeslice_ms, - xe_svm_devm_owner(xe)); - if (err) - xe_svm_devmem_release(&bo->devmem_allocation); + start, end, &mdetails); xe_bo_unlock(bo); xe_bo_put(bo); } @@ -921,23 +1103,23 @@ static bool supports_4K_migration(struct xe_device *xe) * xe_svm_range_needs_migrate_to_vram() - SVM range needs migrate to VRAM or not * @range: SVM range for which migration needs to be decided * @vma: vma which has range - * @preferred_region_is_vram: preferred region for range is vram + * @dpagemap: The preferred struct drm_pagemap to migrate to. * * Return: True for range needing migration and migration is supported else false */ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, - bool preferred_region_is_vram) + const struct drm_pagemap *dpagemap) { struct xe_vm *vm = range_to_vm(&range->base); u64 range_size = xe_svm_range_size(range); - if (!range->base.pages.flags.migrate_devmem || !preferred_region_is_vram) + if (!range->base.pages.flags.migrate_devmem || !dpagemap) return false; xe_assert(vm->xe, IS_DGFX(vm->xe)); - if (xe_svm_range_in_vram(range)) { - drm_info(&vm->xe->drm, "Range is already in VRAM\n"); + if (xe_svm_range_has_pagemap(range, dpagemap)) { + drm_dbg(&vm->xe->drm, "Range is already in VRAM\n"); return false; } @@ -1011,7 +1193,6 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, .devmem_only = need_vram && devmem_possible, .timeslice_ms = need_vram && devmem_possible ? vm->xe->atomic_svm_timeslice_ms : 0, - .device_private_page_owner = xe_svm_devm_owner(vm->xe), }; struct xe_validation_ctx vctx; struct drm_exec exec; @@ -1034,9 +1215,9 @@ retry: if (err) return err; - dpagemap = xe_vma_resolve_pagemap(vma, tile); - if (!dpagemap && !ctx.devmem_only) - ctx.device_private_page_owner = NULL; + dpagemap = ctx.devmem_only ? xe_tile_local_pagemap(tile) : + xe_vma_resolve_pagemap(vma, tile); + ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap); range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx); if (IS_ERR(range)) @@ -1049,7 +1230,7 @@ retry: goto out; } - if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) { + if (xe_svm_range_is_valid(range, tile, ctx.devmem_only, dpagemap)) { xe_svm_range_valid_fault_count_stats_incr(gt, range); range_debug(range, "PAGE FAULT - VALID"); goto out; @@ -1058,16 +1239,11 @@ retry: range_debug(range, "PAGE FAULT"); if (--migrate_try_count >= 0 && - xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) { + xe_svm_range_needs_migrate_to_vram(range, vma, dpagemap)) { ktime_t migrate_start = xe_gt_stats_ktime_get(); - /* TODO : For multi-device dpagemap will be used to find the - * remote tile and remote device. Will need to modify - * xe_svm_alloc_vram to use dpagemap for future multi-device - * support. - */ xe_svm_range_migrate_count_stats_incr(gt, range); - err = xe_svm_alloc_vram(tile, range, &ctx); + err = xe_svm_alloc_vram(range, &ctx, dpagemap); xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start); ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { @@ -1118,6 +1294,10 @@ get_pages: if (err) { range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); goto out; + } else if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) { + drm_dbg(&vm->xe->drm, "After page collect data location is %sin \"%s\".\n", + xe_svm_range_has_pagemap(range, dpagemap) ? "" : "NOT ", + dpagemap ? dpagemap->drm->unique : "System."); } xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start); @@ -1365,11 +1545,6 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end) #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) -static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile) -{ - return &tile->mem.vram->dpagemap; -} - /** * xe_vma_resolve_pagemap - Resolve the appropriate DRM pagemap for a VMA * @vma: Pointer to the xe_vma structure containing memory attributes @@ -1389,40 +1564,87 @@ static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile) */ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) { - s32 fd = (s32)vma->attr.preferred_loc.devmem_fd; + struct drm_pagemap *dpagemap = vma->attr.preferred_loc.dpagemap; + s32 fd; + + if (dpagemap) + return dpagemap; + + fd = (s32)vma->attr.preferred_loc.devmem_fd; if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM) return NULL; if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE) - return IS_DGFX(tile_to_xe(tile)) ? tile_local_pagemap(tile) : NULL; + return IS_DGFX(tile_to_xe(tile)) ? xe_tile_local_pagemap(tile) : NULL; - /* TODO: Support multi-device with drm_pagemap_from_fd(fd) */ return NULL; } /** * xe_svm_alloc_vram()- Allocate device memory pages for range, * migrating existing data. - * @tile: tile to allocate vram from * @range: SVM range * @ctx: DRM GPU SVM context + * @dpagemap: The struct drm_pagemap representing the memory to allocate. * * Return: 0 on success, error code on failure. */ -int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) +int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx, + struct drm_pagemap *dpagemap) { - struct drm_pagemap *dpagemap; + static DECLARE_RWSEM(driver_migrate_lock); + struct xe_vm *vm = range_to_vm(&range->base); + enum drm_gpusvm_scan_result migration_state; + struct xe_device *xe = vm->xe; + int err, retries = 1; + bool write_locked = false; - xe_assert(tile_to_xe(tile), range->base.pages.flags.migrate_devmem); + xe_assert(range_to_vm(&range->base)->xe, range->base.pages.flags.migrate_devmem); range_debug(range, "ALLOCATE VRAM"); - dpagemap = tile_local_pagemap(tile); - return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), - xe_svm_range_end(range), - range->base.gpusvm->mm, - ctx->timeslice_ms); + migration_state = drm_gpusvm_scan_mm(&range->base, + xe_svm_private_page_owner(vm, false), + dpagemap->pagemap); + + if (migration_state == DRM_GPUSVM_SCAN_EQUAL) { + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) + drm_dbg(dpagemap->drm, "Already migrated!\n"); + return 0; + } + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) + drm_dbg(&xe->drm, "Request migration to device memory on \"%s\".\n", + dpagemap->drm->unique); + + err = down_read_interruptible(&driver_migrate_lock); + if (err) + return err; + do { + err = drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), + xe_svm_range_end(range), + range->base.gpusvm->mm, + ctx->timeslice_ms); + + if (err == -EBUSY && retries) { + if (!write_locked) { + int lock_err; + + up_read(&driver_migrate_lock); + lock_err = down_write_killable(&driver_migrate_lock); + if (lock_err) + return lock_err; + write_locked = true; + } + drm_gpusvm_range_evict(range->base.gpusvm, &range->base); + } + } while (err == -EBUSY && retries--); + if (write_locked) + up_write(&driver_migrate_lock); + else + up_read(&driver_migrate_lock); + + return err; } static struct drm_pagemap_addr @@ -1432,92 +1654,363 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, unsigned int order, enum dma_data_direction dir) { - struct device *pgmap_dev = dpagemap->dev; + struct device *pgmap_dev = dpagemap->drm->dev; enum drm_interconnect_protocol prot; dma_addr_t addr; if (pgmap_dev == dev) { - addr = xe_vram_region_page_to_dpa(page_to_vr(page), page); + addr = xe_page_to_dpa(page); prot = XE_INTERCONNECT_VRAM; } else { - addr = DMA_MAPPING_ERROR; - prot = 0; + addr = dma_map_resource(dev, + xe_page_to_pcie(page), + PAGE_SIZE << order, dir, + DMA_ATTR_SKIP_CPU_SYNC); + prot = XE_INTERCONNECT_P2P; } return drm_pagemap_addr_encode(addr, prot, order, dir); } +static void xe_drm_pagemap_device_unmap(struct drm_pagemap *dpagemap, + struct device *dev, + struct drm_pagemap_addr addr) +{ + if (addr.proto != XE_INTERCONNECT_P2P) + return; + + dma_unmap_resource(dev, addr.addr, PAGE_SIZE << addr.order, + addr.dir, DMA_ATTR_SKIP_CPU_SYNC); +} + +static void xe_pagemap_destroy_work(struct work_struct *work) +{ + struct xe_pagemap *xpagemap = container_of(work, typeof(*xpagemap), destroy_work); + struct dev_pagemap *pagemap = &xpagemap->pagemap; + struct drm_device *drm = xpagemap->dpagemap.drm; + int idx; + + /* + * Only unmap / release if devm_ release hasn't run yet. + * Otherwise the devm_ callbacks have already released, or + * will do shortly. + */ + if (drm_dev_enter(drm, &idx)) { + devm_memunmap_pages(drm->dev, pagemap); + devm_release_mem_region(drm->dev, pagemap->range.start, + pagemap->range.end - pagemap->range.start + 1); + drm_dev_exit(idx); + } + + drm_pagemap_release_owner(&xpagemap->peer); + kfree(xpagemap); +} + +static void xe_pagemap_destroy(struct drm_pagemap *dpagemap, bool from_atomic_or_reclaim) +{ + struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap); + struct xe_device *xe = to_xe_device(dpagemap->drm); + + if (from_atomic_or_reclaim) + queue_work(xe->destroy_wq, &xpagemap->destroy_work); + else + xe_pagemap_destroy_work(&xpagemap->destroy_work); +} + static const struct drm_pagemap_ops xe_drm_pagemap_ops = { .device_map = xe_drm_pagemap_device_map, + .device_unmap = xe_drm_pagemap_device_unmap, .populate_mm = xe_drm_pagemap_populate_mm, + .destroy = xe_pagemap_destroy, }; /** - * xe_devm_add: Remap and provide memmap backing for device memory - * @tile: tile that the memory region belongs to - * @vr: vram memory region to remap + * xe_pagemap_create() - Create a struct xe_pagemap object + * @xe: The xe device. + * @vr: Back-pointer to the struct xe_vram_region. * - * This remap device memory to host physical address space and create - * struct page to back device memory + * Allocate and initialize a struct xe_pagemap. On successful + * return, drm_pagemap_put() on the embedded struct drm_pagemap + * should be used to unreference. * - * Return: 0 on success standard error code otherwise + * Return: Pointer to a struct xe_pagemap if successful. Error pointer + * on failure. */ -int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) +static struct xe_pagemap *xe_pagemap_create(struct xe_device *xe, struct xe_vram_region *vr) { - struct xe_device *xe = tile_to_xe(tile); - struct device *dev = &to_pci_dev(xe->drm.dev)->dev; + struct device *dev = xe->drm.dev; + struct xe_pagemap *xpagemap; + struct dev_pagemap *pagemap; + struct drm_pagemap *dpagemap; struct resource *res; void *addr; - int ret; + int err; + + xpagemap = kzalloc(sizeof(*xpagemap), GFP_KERNEL); + if (!xpagemap) + return ERR_PTR(-ENOMEM); + + pagemap = &xpagemap->pagemap; + dpagemap = &xpagemap->dpagemap; + INIT_WORK(&xpagemap->destroy_work, xe_pagemap_destroy_work); + xpagemap->vr = vr; + xpagemap->peer.private = XE_PEER_PAGEMAP; + + err = drm_pagemap_init(dpagemap, pagemap, &xe->drm, &xe_drm_pagemap_ops); + if (err) + goto out_no_dpagemap; res = devm_request_free_mem_region(dev, &iomem_resource, vr->usable_size); if (IS_ERR(res)) { - ret = PTR_ERR(res); - return ret; + err = PTR_ERR(res); + goto out_err; + } + + err = drm_pagemap_acquire_owner(&xpagemap->peer, &xe_owner_list, + xe_has_interconnect); + if (err) + goto out_no_owner; + + pagemap->type = MEMORY_DEVICE_PRIVATE; + pagemap->range.start = res->start; + pagemap->range.end = res->end; + pagemap->nr_range = 1; + pagemap->owner = xpagemap->peer.owner; + pagemap->ops = drm_pagemap_pagemap_ops_get(); + addr = devm_memremap_pages(dev, pagemap); + if (IS_ERR(addr)) { + err = PTR_ERR(addr); + goto out_no_pages; + } + xpagemap->hpa_base = res->start; + return xpagemap; + +out_no_pages: + drm_pagemap_release_owner(&xpagemap->peer); +out_no_owner: + devm_release_mem_region(dev, res->start, res->end - res->start + 1); +out_err: + drm_pagemap_put(dpagemap); + return ERR_PTR(err); + +out_no_dpagemap: + kfree(xpagemap); + return ERR_PTR(err); +} + +/** + * xe_pagemap_find_or_create() - Find or create a struct xe_pagemap + * @xe: The xe device. + * @cache: The struct xe_pagemap_cache. + * @vr: The VRAM region. + * + * Check if there is an already used xe_pagemap for this tile, and in that case, + * return it. + * If not, check if there is a cached xe_pagemap for this tile, and in that case, + * cancel its destruction, re-initialize it and return it. + * Finally if there is no cached or already used pagemap, create one and + * register it in the tile's pagemap cache. + * + * Note that this function is typically called from within an IOCTL, and waits are + * therefore carried out interruptible if possible. + * + * Return: A pointer to a struct xe_pagemap if successful, Error pointer on failure. + */ +static struct xe_pagemap * +xe_pagemap_find_or_create(struct xe_device *xe, struct drm_pagemap_cache *cache, + struct xe_vram_region *vr) +{ + struct drm_pagemap *dpagemap; + struct xe_pagemap *xpagemap; + int err; + + err = drm_pagemap_cache_lock_lookup(cache); + if (err) + return ERR_PTR(err); + + dpagemap = drm_pagemap_get_from_cache(cache); + if (IS_ERR(dpagemap)) { + xpagemap = ERR_CAST(dpagemap); + } else if (!dpagemap) { + xpagemap = xe_pagemap_create(xe, vr); + if (IS_ERR(xpagemap)) + goto out_unlock; + drm_pagemap_cache_set_pagemap(cache, &xpagemap->dpagemap); + } else { + xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap); } - vr->pagemap.type = MEMORY_DEVICE_PRIVATE; - vr->pagemap.range.start = res->start; - vr->pagemap.range.end = res->end; - vr->pagemap.nr_range = 1; - vr->pagemap.ops = drm_pagemap_pagemap_ops_get(); - vr->pagemap.owner = xe_svm_devm_owner(xe); - addr = devm_memremap_pages(dev, &vr->pagemap); +out_unlock: + drm_pagemap_cache_unlock_lookup(cache); + return xpagemap; +} + +static int xe_svm_get_pagemaps(struct xe_vm *vm) +{ + struct xe_device *xe = vm->xe; + struct xe_pagemap *xpagemap; + struct xe_tile *tile; + int id; + + for_each_tile(tile, xe, id) { + struct xe_vram_region *vr; - vr->dpagemap.dev = dev; - vr->dpagemap.ops = &xe_drm_pagemap_ops; + if (!((BIT(id) << 1) & xe->info.mem_region_mask)) + continue; - if (IS_ERR(addr)) { - devm_release_mem_region(dev, res->start, resource_size(res)); - ret = PTR_ERR(addr); - drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n", - tile->id, ERR_PTR(ret)); - return ret; + vr = xe_tile_to_vr(tile); + xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr); + if (IS_ERR(xpagemap)) + break; + vm->svm.pagemaps[id] = xpagemap; + } + + if (IS_ERR(xpagemap)) { + xe_svm_put_pagemaps(vm); + return PTR_ERR(xpagemap); } - vr->hpa_base = res->start; - drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n", - tile->id, vr->io_start, vr->io_start + vr->usable_size, res); return 0; } + +/** + * xe_pagemap_shrinker_create() - Create a drm_pagemap shrinker + * @xe: The xe device + * + * Create a drm_pagemap shrinker and register with the xe device. + * + * Return: %0 on success, negative error code on failure. + */ +int xe_pagemap_shrinker_create(struct xe_device *xe) +{ + xe->usm.dpagemap_shrinker = drm_pagemap_shrinker_create_devm(&xe->drm); + return PTR_ERR_OR_ZERO(xe->usm.dpagemap_shrinker); +} + +/** + * xe_pagemap_cache_create() - Create a drm_pagemap cache + * @tile: The tile to register the cache with + * + * Create a drm_pagemap cache and register with the tile. + * + * Return: %0 on success, negative error code on failure. + */ +int xe_pagemap_cache_create(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + + if (IS_DGFX(xe)) { + struct drm_pagemap_cache *cache = + drm_pagemap_cache_create_devm(xe->usm.dpagemap_shrinker); + + if (IS_ERR(cache)) + return PTR_ERR(cache); + + tile->mem.vram->dpagemap_cache = cache; + } + + return 0; +} + +static struct drm_pagemap *xe_devmem_open(struct xe_device *xe, u32 region_instance) +{ + u32 tile_id = region_instance - 1; + struct xe_pagemap *xpagemap; + struct xe_vram_region *vr; + + if (tile_id >= xe->info.tile_count) + return ERR_PTR(-ENOENT); + + if (!((BIT(tile_id) << 1) & xe->info.mem_region_mask)) + return ERR_PTR(-ENOENT); + + vr = xe_tile_to_vr(&xe->tiles[tile_id]); + + /* Returns a reference-counted embedded struct drm_pagemap */ + xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr); + if (IS_ERR(xpagemap)) + return ERR_CAST(xpagemap); + + return &xpagemap->dpagemap; +} + +/** + * xe_drm_pagemap_from_fd() - Return a drm_pagemap pointer from a + * (file_descriptor, region_instance) pair. + * @fd: An fd opened against an xe device. + * @region_instance: The region instance representing the device memory + * on the opened xe device. + * + * Opens a struct drm_pagemap pointer on the + * indicated device and region_instance. + * + * Return: A reference-counted struct drm_pagemap pointer on success, + * negative error pointer on failure. + */ +struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance) +{ + struct drm_pagemap *dpagemap; + struct file *file; + struct drm_file *fpriv; + struct drm_device *drm; + int idx; + + if (fd <= 0) + return ERR_PTR(-EINVAL); + + file = fget(fd); + if (!file) + return ERR_PTR(-ENOENT); + + if (!xe_is_xe_file(file)) { + dpagemap = ERR_PTR(-ENOENT); + goto out; + } + + fpriv = file->private_data; + drm = fpriv->minor->dev; + if (!drm_dev_enter(drm, &idx)) { + dpagemap = ERR_PTR(-ENODEV); + goto out; + } + + dpagemap = xe_devmem_open(to_xe_device(drm), region_instance); + drm_dev_exit(idx); +out: + fput(file); + return dpagemap; +} + #else -int xe_svm_alloc_vram(struct xe_tile *tile, - struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) + +int xe_pagemap_shrinker_create(struct xe_device *xe) { - return -EOPNOTSUPP; + return 0; } -int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) +int xe_pagemap_cache_create(struct xe_tile *tile) { return 0; } +int xe_svm_alloc_vram(struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx, + struct drm_pagemap *dpagemap) +{ + return -EOPNOTSUPP; +} + struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) { return NULL; } + +struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance) +{ + return ERR_PTR(-ENOENT); +} + #endif /** diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index fa757dd07954..b7b8eeacf196 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -6,29 +6,22 @@ #ifndef _XE_SVM_H_ #define _XE_SVM_H_ -struct xe_device; - -/** - * xe_svm_devm_owner() - Return the owner of device private memory - * @xe: The xe device. - * - * Return: The owner of this device's device private memory to use in - * hmm_range_fault()- - */ -static inline void *xe_svm_devm_owner(struct xe_device *xe) -{ - return xe; -} - #if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) #include <drm/drm_pagemap.h> #include <drm/drm_gpusvm.h> +#include <drm/drm_pagemap_util.h> #define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER +#define XE_INTERCONNECT_P2P (XE_INTERCONNECT_VRAM + 1) + +struct drm_device; +struct drm_file; struct xe_bo; struct xe_gt; +struct xe_device; +struct xe_vram_region; struct xe_tile; struct xe_vm; struct xe_vma; @@ -56,6 +49,24 @@ struct xe_svm_range { }; /** + * struct xe_pagemap - Manages xe device_private memory for SVM. + * @pagemap: The struct dev_pagemap providing the struct pages. + * @dpagemap: The drm_pagemap managing allocation and migration. + * @destroy_work: Handles asnynchronous destruction and caching. + * @peer: Used for pagemap owner computation. + * @hpa_base: The host physical address base for the managemd memory. + * @vr: Backpointer to the xe_vram region. + */ +struct xe_pagemap { + struct dev_pagemap pagemap; + struct drm_pagemap dpagemap; + struct work_struct destroy_work; + struct drm_pagemap_peer peer; + resource_size_t hpa_base; + struct xe_vram_region *vr; +}; + +/** * xe_svm_range_pages_valid() - SVM range pages valid * @range: SVM range * @@ -84,8 +95,8 @@ int xe_svm_bo_evict(struct xe_bo *bo); void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); -int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx); +int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx, + struct drm_pagemap *dpagemap); struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, struct xe_vma *vma, struct drm_gpusvm_ctx *ctx); @@ -94,13 +105,13 @@ int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, struct drm_gpusvm_ctx *ctx); bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, - bool preferred_region_is_vram); + const struct drm_pagemap *dpagemap); void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range); bool xe_svm_range_validate(struct xe_vm *vm, struct xe_svm_range *range, - u8 tile_mask, bool devmem_preferred); + u8 tile_mask, const struct drm_pagemap *dpagemap); u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma); @@ -110,6 +121,8 @@ u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end); struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile); +void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem); + /** * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping * @range: SVM range @@ -171,6 +184,12 @@ static inline unsigned long xe_svm_range_size(struct xe_svm_range *range) void xe_svm_flush(struct xe_vm *vm); +int xe_pagemap_shrinker_create(struct xe_device *xe); + +int xe_pagemap_cache_create(struct xe_tile *tile); + +struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance); + #else #include <linux/interval_tree.h> #include "xe_vm.h" @@ -179,13 +198,14 @@ struct drm_pagemap_addr; struct drm_gpusvm_ctx; struct drm_gpusvm_range; struct xe_bo; -struct xe_gt; +struct xe_device; struct xe_vm; struct xe_vma; struct xe_tile; struct xe_vram_region; #define XE_INTERCONNECT_VRAM 1 +#define XE_INTERCONNECT_P2P (XE_INTERCONNECT_VRAM + 1) struct xe_svm_range { struct { @@ -260,8 +280,8 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) } static inline int -xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) +xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx, + struct drm_pagemap *dpagemap) { return -EOPNOTSUPP; } @@ -302,7 +322,7 @@ static inline unsigned long xe_svm_range_size(struct xe_svm_range *range) static inline bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, - u32 region) + const struct drm_pagemap *dpagemap) { return false; } @@ -343,9 +363,30 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t return NULL; } +static inline void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem) +{ + return NULL; +} + static inline void xe_svm_flush(struct xe_vm *vm) { } + +static inline int xe_pagemap_shrinker_create(struct xe_device *xe) +{ + return 0; +} + +static inline int xe_pagemap_cache_create(struct xe_tile *tile) +{ + return 0; +} + +static inline struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance) +{ + return ERR_PTR(-ENOENT); +} + #define xe_svm_range_has_dma_mapping(...) false #endif /* CONFIG_DRM_XE_GPUSVM */ diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 63c060c2ea5c..eb262aad11da 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -6,6 +6,7 @@ #include <linux/fault-inject.h> #include <drm/drm_managed.h> +#include <drm/drm_pagemap_util.h> #include "xe_bo.h" #include "xe_device.h" @@ -180,17 +181,19 @@ ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */ int xe_tile_init_noalloc(struct xe_tile *tile) { struct xe_device *xe = tile_to_xe(tile); + int err; xe_wa_apply_tile_workarounds(tile); - if (xe->info.has_usm && IS_DGFX(xe)) - xe_devm_add(tile, tile->mem.vram); + err = xe_pagemap_cache_create(tile); + if (err) + return err; if (IS_DGFX(xe) && !ttm_resource_manager_used(&tile->mem.vram->ttm.manager)) { - int err = xe_ttm_vram_mgr_init(xe, tile->mem.vram); - + err = xe_ttm_vram_mgr_init(xe, tile->mem.vram); if (err) return err; + xe->info.mem_region_mask |= BIT(tile->mem.vram->id) << 1; } @@ -220,3 +223,26 @@ void xe_tile_migrate_wait(struct xe_tile *tile) { xe_migrate_wait(tile->migrate); } + +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +/** + * xe_tile_local_pagemap() - Return a pointer to the tile's local drm_pagemap if any + * @tile: The tile. + * + * Return: A pointer to the tile's local drm_pagemap, or NULL if local pagemap + * support has been compiled out. + */ +struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) +{ + struct drm_pagemap *dpagemap = + drm_pagemap_get_from_cache_if_active(xe_tile_to_vr(tile)->dpagemap_cache); + + if (dpagemap) { + xe_assert(tile_to_xe(tile), kref_read(&dpagemap->ref) >= 2); + drm_pagemap_put(dpagemap); + } + + return dpagemap; +} +#endif + diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index dceb6297aa01..734132eddda5 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -8,6 +8,7 @@ #include "xe_device_types.h" +struct xe_pagemap; struct xe_tile; int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id); @@ -23,4 +24,24 @@ static inline bool xe_tile_is_root(struct xe_tile *tile) return tile->id == 0; } +/** + * xe_tile_to_vr() - Return the struct xe_vram_region pointer from a + * struct xe_tile pointer + * @tile: Pointer to the struct xe_tile. + * + * Return: Pointer to the struct xe_vram_region embedded in *@tile. + */ +static inline struct xe_vram_region *xe_tile_to_vr(struct xe_tile *tile) +{ + return tile->mem.vram; +} + +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile); +#else +static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) +{ + return NULL; +} +#endif #endif diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c index 0d9130b1958a..e120323c43bc 100644 --- a/drivers/gpu/drm/xe/xe_userptr.c +++ b/drivers/gpu/drm/xe/xe_userptr.c @@ -55,7 +55,7 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) struct xe_device *xe = vm->xe; struct drm_gpusvm_ctx ctx = { .read_only = xe_vma_read_only(vma), - .device_private_page_owner = xe_svm_devm_owner(xe), + .device_private_page_owner = xe_svm_private_page_owner(vm, false), .allow_mixed = true, }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 95e22ff95ea8..a07d8b53de66 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -957,14 +957,37 @@ free_ops: return fence; } +static void xe_vma_mem_attr_fini(struct xe_vma_mem_attr *attr) +{ + drm_pagemap_put(attr->preferred_loc.dpagemap); +} + static void xe_vma_free(struct xe_vma *vma) { + xe_vma_mem_attr_fini(&vma->attr); + if (xe_vma_is_userptr(vma)) kfree(to_userptr_vma(vma)); else kfree(vma); } +/** + * xe_vma_mem_attr_copy() - copy an xe_vma_mem_attr structure. + * @to: Destination. + * @from: Source. + * + * Copies an xe_vma_mem_attr structure taking care to get reference + * counting of individual members right. + */ +void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from) +{ + xe_vma_mem_attr_fini(to); + *to = *from; + if (to->preferred_loc.dpagemap) + drm_pagemap_get(to->preferred_loc.dpagemap); +} + static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, @@ -1015,8 +1038,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (vm->xe->info.has_atomic_enable_pte_bit) vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; - vma->attr = *attr; - + xe_vma_mem_attr_copy(&vma->attr, attr); if (bo) { struct drm_gpuvm_bo *vm_bo; @@ -2320,7 +2342,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, struct xe_tile *tile; struct xe_svm_range *svm_range; struct drm_gpusvm_ctx ctx = {}; - struct drm_pagemap *dpagemap; + struct drm_pagemap *dpagemap = NULL; u8 id, tile_mask = 0; u32 i; @@ -2338,23 +2360,17 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); op->prefetch_range.ranges_count = 0; - tile = NULL; if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { dpagemap = xe_vma_resolve_pagemap(vma, xe_device_get_root_tile(vm->xe)); - /* - * TODO: Once multigpu support is enabled will need - * something to dereference tile from dpagemap. - */ - if (dpagemap) - tile = xe_device_get_root_tile(vm->xe); } else if (prefetch_region) { tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - XE_PL_VRAM0]; + dpagemap = xe_tile_local_pagemap(tile); } - op->prefetch_range.tile = tile; + op->prefetch_range.dpagemap = dpagemap; alloc_next_range: svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); @@ -2373,7 +2389,7 @@ alloc_next_range: goto unwind_prefetch_ops; } - if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) { + if (xe_svm_range_validate(vm, svm_range, tile_mask, dpagemap)) { xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); goto check_next_range; } @@ -2895,7 +2911,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) { bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - struct xe_tile *tile = op->prefetch_range.tile; + struct drm_pagemap *dpagemap = op->prefetch_range.dpagemap; int err = 0; struct xe_svm_range *svm_range; @@ -2908,15 +2924,22 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) ctx.read_only = xe_vma_read_only(vma); ctx.devmem_possible = devmem_possible; ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; - ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe); + ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap); /* TODO: Threading the migration */ xa_for_each(&op->prefetch_range.range, i, svm_range) { - if (!tile) + if (!dpagemap) xe_svm_range_migrate_to_smem(vm, svm_range); - if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) { - err = xe_svm_alloc_vram(tile, svm_range, &ctx); + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) { + drm_dbg(&vm->xe->drm, + "Prefetch pagemap is %s start 0x%016lx end 0x%016lx\n", + dpagemap ? dpagemap->drm->unique : "system", + xe_svm_range_start(svm_range), xe_svm_range_end(svm_range)); + } + + if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, dpagemap)) { + err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap); if (err) { drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); @@ -4324,7 +4347,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, struct drm_gpuva_op *__op; unsigned int vma_flags = 0; bool remap_op = false; - struct xe_vma_mem_attr tmp_attr; + struct xe_vma_mem_attr tmp_attr = {}; u16 default_pat; int err; @@ -4419,7 +4442,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, * VMA, so they can be assigned to newly MAP created vma. */ if (is_madvise) - tmp_attr = vma->attr; + xe_vma_mem_attr_copy(&tmp_attr, &vma->attr); xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); } else if (__op->op == DRM_GPUVA_OP_MAP) { @@ -4429,12 +4452,13 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, * copy them to new vma. */ if (is_madvise) - vma->attr = tmp_attr; + xe_vma_mem_attr_copy(&vma->attr, &tmp_attr); } } xe_vm_unlock(vm); drm_gpuva_ops_free(&vm->gpuvm, ops); + xe_vma_mem_attr_fini(&tmp_attr); return 0; unwind_ops: @@ -4532,3 +4556,4 @@ int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t r return xe_vm_alloc_vma(vm, &map_req, false); } + diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 361f10b3c453..7d11ca47d73e 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -414,4 +414,5 @@ static inline struct drm_exec *xe_vm_validation_exec(struct xe_vm *vm) #define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \ ((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id)) +void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from); #endif diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index cad3cf627c3f..add9a6ca2390 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -22,6 +22,19 @@ struct xe_vmas_in_madvise_range { bool has_svm_userptr_vmas; }; +/** + * struct xe_madvise_details - Argument to madvise_funcs + * @dpagemap: Reference-counted pointer to a struct drm_pagemap. + * + * The madvise IOCTL handler may, in addition to the user-space + * args, have additional info to pass into the madvise_func that + * handles the madvise type. Use a struct_xe_madvise_details + * for that and extend the struct as necessary. + */ +struct xe_madvise_details { + struct drm_pagemap *dpagemap; +}; + static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range) { u64 addr = madvise_range->addr; @@ -74,34 +87,41 @@ static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_r static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, - struct drm_xe_madvise *op) + struct drm_xe_madvise *op, + struct xe_madvise_details *details) { int i; xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC); for (i = 0; i < num_vmas; i++) { + struct xe_vma *vma = vmas[i]; + struct xe_vma_preferred_loc *loc = &vma->attr.preferred_loc; + /*TODO: Extend attributes to bo based vmas */ - if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd && - vmas[i]->attr.preferred_loc.migration_policy == - op->preferred_mem_loc.migration_policy) || - !xe_vma_is_cpu_addr_mirror(vmas[i])) { - vmas[i]->skip_invalidation = true; + if ((loc->devmem_fd == op->preferred_mem_loc.devmem_fd && + loc->migration_policy == op->preferred_mem_loc.migration_policy) || + !xe_vma_is_cpu_addr_mirror(vma)) { + vma->skip_invalidation = true; } else { - vmas[i]->skip_invalidation = false; - vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd; + vma->skip_invalidation = false; + loc->devmem_fd = op->preferred_mem_loc.devmem_fd; /* Till multi-device support is not added migration_policy * is of no use and can be ignored. */ - vmas[i]->attr.preferred_loc.migration_policy = - op->preferred_mem_loc.migration_policy; + loc->migration_policy = op->preferred_mem_loc.migration_policy; + drm_pagemap_put(loc->dpagemap); + loc->dpagemap = NULL; + if (details->dpagemap) + loc->dpagemap = drm_pagemap_get(details->dpagemap); } } } static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, - struct drm_xe_madvise *op) + struct drm_xe_madvise *op, + struct xe_madvise_details *details) { struct xe_bo *bo; int i; @@ -142,7 +162,8 @@ static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm, static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, - struct drm_xe_madvise *op) + struct drm_xe_madvise *op, + struct xe_madvise_details *details) { int i; @@ -160,7 +181,8 @@ static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, - struct drm_xe_madvise *op); + struct drm_xe_madvise *op, + struct xe_madvise_details *details); static const madvise_func madvise_funcs[] = { [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc, @@ -244,11 +266,12 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)) return false; - if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy > - DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES)) + if (XE_IOCTL_DBG(xe, fd <= DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && + args->preferred_mem_loc.region_instance != 0)) return false; - if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad)) + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy > + DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES)) return false; if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved)) @@ -294,6 +317,41 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv return true; } +static int xe_madvise_details_init(struct xe_vm *vm, const struct drm_xe_madvise *args, + struct xe_madvise_details *details) +{ + struct xe_device *xe = vm->xe; + + memset(details, 0, sizeof(*details)); + + if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) { + int fd = args->preferred_mem_loc.devmem_fd; + struct drm_pagemap *dpagemap; + + if (fd <= 0) + return 0; + + dpagemap = xe_drm_pagemap_from_fd(args->preferred_mem_loc.devmem_fd, + args->preferred_mem_loc.region_instance); + if (XE_IOCTL_DBG(xe, IS_ERR(dpagemap))) + return PTR_ERR(dpagemap); + + /* Don't allow a foreign placement without a fast interconnect! */ + if (XE_IOCTL_DBG(xe, dpagemap->pagemap->owner != vm->svm.peer.owner)) { + drm_pagemap_put(dpagemap); + return -ENOLINK; + } + details->dpagemap = dpagemap; + } + + return 0; +} + +static void xe_madvise_details_fini(struct xe_madvise_details *details) +{ + drm_pagemap_put(details->dpagemap); +} + static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, u32 atomic_val) { @@ -347,6 +405,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil struct drm_xe_madvise *args = data; struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start, .range = args->range, }; + struct xe_madvise_details details; struct xe_vm *vm; struct drm_exec exec; int err, attr_type; @@ -371,13 +430,17 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil goto unlock_vm; } - err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); + err = xe_madvise_details_init(vm, args, &details); if (err) goto unlock_vm; + err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); + if (err) + goto madv_fini; + err = get_vmas(vm, &madvise_range); if (err || !madvise_range.num_vmas) - goto unlock_vm; + goto madv_fini; if (madvise_range.has_bo_vmas) { if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { @@ -385,7 +448,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil madvise_range.num_vmas, args->atomic.val)) { err = -EINVAL; - goto unlock_vm; + goto madv_fini; } } @@ -411,7 +474,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil } attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); - madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args); + madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args, + &details); err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range); @@ -423,6 +487,8 @@ err_fini: drm_exec_fini(&exec); kfree(madvise_range.vmas); madvise_range.vmas = NULL; +madv_fini: + xe_madvise_details_fini(&details); unlock_vm: up_write(&vm->lock); put_vm: diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 18bad1dd08e6..437f64202f3b 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -8,6 +8,7 @@ #include <drm/drm_gpusvm.h> #include <drm/drm_gpuvm.h> +#include <drm/drm_pagemap_util.h> #include <linux/dma-resv.h> #include <linux/kref.h> @@ -19,6 +20,8 @@ #include "xe_range_fence.h" #include "xe_userptr.h" +struct drm_pagemap; + struct xe_bo; struct xe_svm_range; struct xe_sync_entry; @@ -53,7 +56,7 @@ struct xe_vm_pgtable_update_op; */ struct xe_vma_mem_attr { /** @preferred_loc: preferred memory_location */ - struct { + struct xe_vma_preferred_loc { /** @preferred_loc.migration_policy: Pages migration policy */ u32 migration_policy; @@ -64,6 +67,13 @@ struct xe_vma_mem_attr { * closest device memory respectively. */ u32 devmem_fd; + /** + * @preferred_loc.dpagemap: Reference-counted pointer to the drm_pagemap preferred + * for migration on a SVM page-fault. The pointer is protected by the + * vm lock, and is %NULL if @devmem_fd should be consulted for special + * values. + */ + struct drm_pagemap *dpagemap; } preferred_loc; /** @@ -191,6 +201,9 @@ struct xe_vm { */ struct work_struct work; } garbage_collector; + struct xe_pagemap *pagemaps[XE_MAX_TILES_PER_DEVICE]; + /** @svm.peer: Used for pagemap connectivity computations. */ + struct drm_pagemap_peer peer; } svm; struct xe_device *xe; @@ -395,10 +408,10 @@ struct xe_vma_op_prefetch_range { /** @ranges_count: number of svm ranges to map */ u32 ranges_count; /** - * @tile: Pointer to the tile structure containing memory to prefetch. - * NULL if prefetch requested region is smem + * @dpagemap: Pointer to the dpagemap structure containing memory to prefetch. + * NULL if prefetch requested region is smem */ - struct xe_tile *tile; + struct drm_pagemap *dpagemap; }; /** enum xe_vma_op_flags - flags for VMA operation */ diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index 1b9e9b028975..c64d98bf1723 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -25,97 +25,6 @@ #include "xe_vram.h" #include "xe_vram_types.h" -static void resize_bar(struct xe_device *xe, int resno, resource_size_t size) -{ - struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - int bar_size = pci_rebar_bytes_to_size(size); - int ret; - - ret = pci_resize_resource(pdev, resno, bar_size, 0); - if (ret) { - drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n", - resno, 1 << bar_size, ERR_PTR(ret)); - return; - } - - drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); -} - -/* - * if force_vram_bar_size is set, attempt to set to the requested size - * else set to maximum possible size - */ -void xe_vram_resize_bar(struct xe_device *xe) -{ - int force_vram_bar_size = xe_modparam.force_vram_bar_size; - struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - struct pci_bus *root = pdev->bus; - resource_size_t current_size; - resource_size_t rebar_size; - struct resource *root_res; - int max_size, i; - u32 pci_cmd; - - /* gather some relevant info */ - current_size = pci_resource_len(pdev, LMEM_BAR); - - if (force_vram_bar_size < 0) - return; - - /* set to a specific size? */ - if (force_vram_bar_size) { - rebar_size = pci_rebar_bytes_to_size(force_vram_bar_size * - (resource_size_t)SZ_1M); - - if (!pci_rebar_size_supported(pdev, LMEM_BAR, rebar_size)) { - drm_info(&xe->drm, - "Requested size: %lluMiB is not supported by rebar sizes: 0x%llx. Leaving default: %lluMiB\n", - (u64)pci_rebar_size_to_bytes(rebar_size) >> 20, - pci_rebar_get_possible_sizes(pdev, LMEM_BAR), - (u64)current_size >> 20); - return; - } - - rebar_size = pci_rebar_size_to_bytes(rebar_size); - if (rebar_size == current_size) - return; - } else { - max_size = pci_rebar_get_max_size(pdev, LMEM_BAR); - if (max_size < 0) - return; - rebar_size = pci_rebar_size_to_bytes(max_size); - - /* only resize if larger than current */ - if (rebar_size <= current_size) - return; - } - - drm_info(&xe->drm, "Attempting to resize bar from %lluMiB -> %lluMiB\n", - (u64)current_size >> 20, (u64)rebar_size >> 20); - - while (root->parent) - root = root->parent; - - pci_bus_for_each_resource(root, root_res, i) { - if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && - (u64)root_res->start > 0x100000000ul) - break; - } - - if (!root_res) { - drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n"); - return; - } - - pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); - pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); - - resize_bar(xe, LMEM_BAR, rebar_size); - - pci_assign_unassigned_bus_resources(pdev->bus); - pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); -} - static bool resource_is_valid(struct pci_dev *pdev, int bar) { if (!pci_resource_flags(pdev, bar)) diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h index 13505cfb184d..72860f714fc6 100644 --- a/drivers/gpu/drm/xe/xe_vram.h +++ b/drivers/gpu/drm/xe/xe_vram.h @@ -11,7 +11,6 @@ struct xe_device; struct xe_vram_region; -void xe_vram_resize_bar(struct xe_device *xe); int xe_vram_probe(struct xe_device *xe); struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement); diff --git a/drivers/gpu/drm/xe/xe_vram_types.h b/drivers/gpu/drm/xe/xe_vram_types.h index 83772dcbf1af..646e3c12ae9f 100644 --- a/drivers/gpu/drm/xe/xe_vram_types.h +++ b/drivers/gpu/drm/xe/xe_vram_types.h @@ -66,19 +66,8 @@ struct xe_vram_region { #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) /** @migrate: Back pointer to migrate */ struct xe_migrate *migrate; - /** @pagemap: Used to remap device memory as ZONE_DEVICE */ - struct dev_pagemap pagemap; - /** - * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory - * pages of this tile. - */ - struct drm_pagemap dpagemap; - /** - * @hpa_base: base host physical address - * - * This is generated when remap device memory as ZONE_DEVICE - */ - resource_size_t hpa_base; + /** @dpagemap_cache: drm_pagemap cache. */ + struct drm_pagemap_cache *dpagemap_cache; #endif }; diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c index 8f23a27871b6..c83ea3d48fae 100644 --- a/drivers/gpu/drm/xe/xe_vsec.c +++ b/drivers/gpu/drm/xe/xe_vsec.c @@ -158,13 +158,15 @@ int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_off guard(mutex)(&xe->pmt.lock); + if (!xe->soc_remapper.set_telem_region) + return -ENODEV; + /* indicate that we are not at an appropriate power level */ if (!xe_pm_runtime_get_if_active(xe)) return -ENODATA; /* set SoC re-mapper index register based on GUID memory region */ - xe_mmio_rmw32(xe_root_tile_mmio(xe), SG_REMAP_INDEX1, SG_REMAP_BITS, - REG_FIELD_PREP(SG_REMAP_BITS, mem_region)); + xe->soc_remapper.set_telem_region(xe, mem_region); memcpy_fromio(data, telem_addr, count); xe_pm_runtime_put(xe); |
