summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-15 08:45:00 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-15 08:45:00 -0700
commit4a57e0913e8c7fff407e97909f4ae48caa84d612 (patch)
tree96c5d9056a7f1dcaaca5f00749a298a60967b01b /include/linux
parentafac4c66d1aa6396ce44d94fe895d7b61e085fd4 (diff)
parent83e8d8bbffa8161e94f3aeee4dd09a35062a78c8 (diff)
Merge tag 'drm-next-2026-04-15' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie: "Highlights: - new DRM RAS infrastructure using netlink - amdgpu: enable DC on CIK APUs, and more IP enablement, and more user queue work - xe: purgeable BO support, and new hw enablement - dma-buf : add revocable operations Full summary: mm: - two-pass MMU interval notifiers - add gpu active/reclaim per-node stat counters math: - provide __KERNEL_DIV_ROUND_CLOSEST() in UAPI - implement DIV_ROUND_CLOSEST() with __KERNEL_DIV_ROUND_CLOSEST() rust: - shared tag with driver-core: register macro and io infra - core: rework DMA coherent API - core: add interop::list to interop with C linked lists - core: add more num::Bounded operations - core: enable generic_arg_infer and add EMSGSIZE - workqueue: add ARef<T> support for work and delayed work - add GPU buddy allocator abstraction - add DRM shmem GEM helper abstraction - allow drm:::Device to dispatch work and delayed work items to driver private data - add dma_resv_lock helper and raw accessors core: - introduce DRM RAS infrastructure over netlink - add connector panel_type property - fourcc: add ARM interleaved 64k modifier - colorop: add destroy helper - suballoc: split into alloc and init helpers - mode: provide DRM_ARGB_GET*() macros for reading color components edid: - provide drm_output_color_Format dma-buf: - provide revoke mechanism for shared buffers - rename move_notify to invalidate_mappings - always enable move_notify - protect dma_fence_ops with RCU and improve locking - clean pages with helpers atomic: - allocate drm_private_state via callback - helper: use system_percpu_wq buddy: - make buddy allocator available to gpu level - add kernel-doc for buddy allocator - improve aligned allocation ttm: - fix fence signalling - improve tests and docs - improve handling of gfp_retry_mayfail - use per-node stat counters to track memory allocations - port pool to use list_lru - drop NUMA specific pools - make pool shrinker numa aware - track allocated pages per numa node coreboot: - cleanup coreboot framebuffer support sched: - fix race condition in drm_sched_fini pagemap: - enable THP support - pass pagemap_addr by reference gem-shmem: - Track page accessed/dirty status across mmap/vmap gpusvm: - reenable device to device migration - fix unbalanced unclock bridge: - anx7625: Support USB-C plus DT bindings - connector: Fix EDID detection - dw-hdmi-qp: Support Vendor-Specfic and SDP Infoframes; improve others - fsl-ldb: Fix visual artifacts plus related DT property 'enable-termination-resistor' - imx8qxp-pixel-link: Improve bridge reference handling - lt9611: Support Port-B-only input plus DT bindings - tda998x: Support DRM_BRIDGE_ATTACH_NO_CONNECTOR; Clean up - Support TH1520 HDMI plus DT bindings - waveshare-dsi: Fix register and attach; Support 1..4 DSI lanes plus DT bindings - anx7625: Fix USB Type-C handling - cdns-mhdp8546-core: Handle HDCP state in bridge atomic_check - Support Lontium LT8713SX DP MST bridge plus DT bindings - analogix_dp: Use DP helpers for link training panel: - panel-jdi-lt070me05000: Use mipi-dsi multi functions - panel-edp: Support Add AUO B116XAT04.1 (HW: 1A); Support CMN N116BCL-EAK (C2); Support FriendlyELEC plus DT changes - panel-edp: Fix timings for BOE NV140WUM-N64 - ilitek-ili9882t: Allow GPIO calls to sleep - jadard: Support TAIGUAN XTI05101-01A - lxd: Support LXD M9189A plus DT bindings - mantix: Fix pixel clock; Clean up - motorola: Support Motorola Atrix 4G and Droid X2 plus DT bindings - novatek: Support Novatek/Tianma NT37700F plus DT bindings - simple: Support EDT ET057023UDBA plus DT bindings; Support Powertip PH800480T032-ZHC19 plus DT bindings; Support Waveshare 13.3" - novatek-nt36672a: Use mipi_dsi_*_multi() functions - panel-edp: Support BOE NV153WUM-N42, CMN N153JCA-ELK, CSW MNF307QS3-2 - support Himax HX83121A plus DT bindings - support JuTouch JT070TM041 plus DT bindings - support Samsung S6E8FC0 plus DT bindings - himax-hx83102c: support Samsung S6E8FC0 plus DT bindings; support backlight - ili9806e: support Rocktech RK050HR345-CT106A plus DT bindings - simple: support Tianma TM050RDH03 plus DT bindings amdgpu: - enable DC by default on CIK APUs - userq fence ioctl param size fixes - set panel_type to OLED for eDP - refactor DC i2c code - FAMS2 update - rework ttm handling to allow multiple engines - DC DCE 6.x cleanup - DC support for NUTMEG/TRAVIS DP bridge - DCN 4.2 support - GC12 idle power fix for compute - use struct drm_edid in non-DC code - enable NV12/P010 support on primary planes - support newer IP discovery tables - VCN/JPEG 5.0.2 support - GC/MES 12.1 updates - USERQ fixes - add DC idle state manager - eDP DSC seamless boot amdkfd: - GC 12.1 updates - non 4K page fixes xe: - basic Xe3p_LPG and NVL-P enabling patches - allow VM_BIND decompress support - add purgeable buffer object support - add xe_vm_get_property_ioctl - restrict multi-lrc to VCS/VECS engines - allow disabling VM overcommit in fault mode - dGPU memory optimizations - Workaround cleanups and simplification - Allow VFs VRAM quote changes using sysfs - convert GT stats to per-cpu counters - pagefault refactors - enable multi-queue on xe3p_xpc - disable DCC on PTL - make MMIO communication more robust - disable D3Cold for BMG on specific platforms - vfio: improve FLR sync for Xe VFIO i915/display: - C10/C20/LT PHY PLL divider verification - use trans push mechanism to generate PSR frame change on LNL+ - refactor DP DSC slice config - VGA decode refactoring - refactor DPT, gen2-4 overlay, masked field register macro helpers - refactor stolen memory allocation decisions - prepare for UHBR DP tunnels - refactor LT PHY PLL to use DPLL framework - implement register polling/waiting in display code - add shared stepping header between i915 and display i915: - fix potential overflow of shmem scatterlist length nouveau: - provide Z cull info to userspace - initial GA100 support - shutdown on PCI device shutdown nova-core: - harden GSP command queue - add support for large RPCs - simplify GSP sequencer and message handling - refactor falcon firmware handling - convert to new register macro - conver to new DMA coherent API - use checked arithmetic - add debugfs support for gsp-rm log buffers - fix aux device registration for multi-GPU msm: - CI: - Uprev mesa - Restore CI jobs for Qualcomm APQ8016 and APQ8096 devices - Core: - Switched to of_get_available_child_by_name() - DPU: - Fixes for DSC panels - Fixed brownout because of the frequency / OPP mismatch - Quad pipe preparation (not enabled yet) - Switched to virtual planes by default - Dropped VBIF_NRT support - Added support for Eliza platform - Reworked alpha handling - Switched to correct CWB definitions on Eliza - Dropped dummy INTF_0 on MSM8953 - Corrected INTFs related to DP-MST - DP: - Removed debug prints looking into PHY internals - DSI: - Fixes for DSC panels - RGB101010 support - Support for SC8280XP - Moved PHY bindings from display/ to phy/ - GPU: - Preemption support for x2-85 and a840 - IFPC support for a840 - SKU detection support for x2-85 and a840 - Expose AQE support (VK ray-pipeline) - Avoid locking in VM_BIND fence signaling path - Fix to avoid reclaim in GPU snapshot path - Disallow foreign mapping of _NO_SHARE BOs - HDMI: - Fixed infoframes programming - MDP5: - Dropped support for MSM8974v1 - Dropped now unused code for MSM8974 v1 and SDM660 / MSM8998 panthor: - add tracepoints for power and IRQs - fix fence handling - extend timestamp query with flags - support various sources for timestamp queries tyr: - fix names and model/versions rockchip: - vop2: use drm logging function - rk3576 displayport support - support CRTC background color atmel-hlcdc: - support sana5d65 LCD controller tilcdc: - use DT bindings schema - use managed DRM interfaces - support DRM_BRIDGE_ATTACH_NO_CONNECTOR verisilicon: - support DC8200 + DT bindings virtgpu: - support PRIME import with 3D enabled komeda: - fix integer overflow in AFBC checks mcde: - improve bridge handling gma500: - use drm client buffer for fbdev framebuffer amdxdna: - add sensors ioctls - provide NPU power estimate - support column utilization sensor - allow forcing DMA through IOMMU IOVA - support per-BO mem usage queries - refactor GEM implementation ivpu: - update boot API to v3.29.4 - limit per-user number of doorbells/contexts - perform engine reset on TDR error loongson: - replace custom code with drm_gem_ttm_dumb_map_offset() imx: - support planes behind the primary plane - fix bus-format selection vkms: - support CRTC background color v3d: - improve handling of struct v3d_stats komeda: - support Arm China Linlon D6 plus DT bindings imagination: - improve power-off sequence - support context-reset notification from firmware mediatek: - mtk_dsi: enable hs clock during pre-enable - Remove all conflicting aperture devices during probe - Add support for mt8167 display blocks" * tag 'drm-next-2026-04-15' of https://gitlab.freedesktop.org/drm/kernel: (1735 commits) drm/ttm/tests: Remove checks from ttm_pool_free_no_dma_alloc drm/ttm/tests: fix lru_count ASSERT drm/vram: remove DRM_VRAM_MM_FILE_OPERATIONS from docs drm/fb-helper: Fix a locking bug in an error path dma-fence: correct kernel-doc function parameter @flags ttm/pool: track allocated_pages per numa node. ttm/pool: make pool shrinker NUMA aware (v2) ttm/pool: drop numa specific pools ttm/pool: port to list_lru. (v2) drm/ttm: use gpu mm stats to track gpu memory allocations. (v4) mm: add gpu active/reclaim per-node stat counters (v2) gpu: nova-core: fix missing colon in SEC2 boot debug message gpu: nova-core: vbios: use from_le_bytes() for PCI ROM header parsing gpu: nova-core: bitfield: fix broken Default implementation gpu: nova-core: falcon: pad firmware DMA object size to required block alignment gpu: nova-core: gsp: fix undefined behavior in command queue code drm/shmem_helper: Make sure PMD entries get the writeable upgrade accel/ivpu: Trigger recovery on TDR with OS scheduling drm/msm: Use of_get_available_child_by_name() dt-bindings: display/msm: move DSI PHY bindings to phy/ subdir ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/coreboot.h90
-rw-r--r--include/linux/dma-buf.h17
-rw-r--r--include/linux/dma-fence-array.h1
-rw-r--r--include/linux/dma-fence-chain.h1
-rw-r--r--include/linux/dma-fence.h97
-rw-r--r--include/linux/gpu_buddy.h241
-rw-r--r--include/linux/iopoll.h8
-rw-r--r--include/linux/math.h18
-rw-r--r--include/linux/mmu_notifier.h42
-rw-r--r--include/linux/mmzone.h2
10 files changed, 477 insertions, 40 deletions
diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h
new file mode 100644
index 000000000000..5d40ca7a1d89
--- /dev/null
+++ b/include/linux/coreboot.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * coreboot.h
+ *
+ * Coreboot device and driver interfaces.
+ *
+ * Copyright 2014 Gerd Hoffmann <kraxel@redhat.com>
+ * Copyright 2017 Google Inc.
+ * Copyright 2017 Samuel Holland <samuel@sholland.org>
+ */
+
+#ifndef _LINUX_COREBOOT_H
+#define _LINUX_COREBOOT_H
+
+#include <linux/compiler_attributes.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+typedef __aligned(4) u64 cb_u64;
+
+/* List of coreboot entry structures that is used */
+
+#define CB_TAG_FRAMEBUFFER 0x12
+#define LB_TAG_CBMEM_ENTRY 0x31
+
+/* Generic */
+struct coreboot_table_entry {
+ u32 tag;
+ u32 size;
+};
+
+/* Points to a CBMEM entry */
+struct lb_cbmem_ref {
+ u32 tag;
+ u32 size;
+
+ cb_u64 cbmem_addr;
+};
+
+/* Corresponds to LB_TAG_CBMEM_ENTRY */
+struct lb_cbmem_entry {
+ u32 tag;
+ u32 size;
+
+ cb_u64 address;
+ u32 entry_size;
+ u32 id;
+};
+
+#define LB_FRAMEBUFFER_ORIENTATION_NORMAL 0
+#define LB_FRAMEBUFFER_ORIENTATION_BOTTOM_UP 1
+#define LB_FRAMEBUFFER_ORIENTATION_LEFT_UP 2
+#define LB_FRAMEBUFFER_ORIENTATION_RIGHT_UP 3
+
+/* Describes framebuffer setup by coreboot */
+struct lb_framebuffer {
+ u32 tag;
+ u32 size;
+
+ cb_u64 physical_address;
+ u32 x_resolution;
+ u32 y_resolution;
+ u32 bytes_per_line;
+ u8 bits_per_pixel;
+ u8 red_mask_pos;
+ u8 red_mask_size;
+ u8 green_mask_pos;
+ u8 green_mask_size;
+ u8 blue_mask_pos;
+ u8 blue_mask_size;
+ u8 reserved_mask_pos;
+ u8 reserved_mask_size;
+ u8 orientation;
+};
+
+/*
+ * True if the coreboot-provided data is large enough to hold information
+ * on the linear framebuffer. False otherwise.
+ */
+#define LB_FRAMEBUFFER_HAS_LFB(__fb) \
+ ((__fb)->size >= offsetofend(struct lb_framebuffer, reserved_mask_size))
+
+/*
+ * True if the coreboot-provided data is large enough to hold information
+ * on the display orientation. False otherwise.
+ */
+#define LB_FRAMEBUFFER_HAS_ORIENTATION(__fb) \
+ ((__fb)->size >= offsetofend(struct lb_framebuffer, orientation))
+
+#endif /* _LINUX_COREBOOT_H */
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 133b9e637b55..166933b82e27 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -407,7 +407,7 @@ struct dma_buf {
* through the device.
*
* - Dynamic importers should set fences for any access that they can't
- * disable immediately from their &dma_buf_attach_ops.move_notify
+ * disable immediately from their &dma_buf_attach_ops.invalidate_mappings
* callback.
*
* IMPORTANT:
@@ -446,7 +446,7 @@ struct dma_buf_attach_ops {
bool allow_peer2peer;
/**
- * @move_notify: [optional] notification that the DMA-buf is moving
+ * @invalidate_mappings: [optional] notification that the DMA-buf is moving
*
* If this callback is provided the framework can avoid pinning the
* backing store while mappings exists.
@@ -456,14 +456,10 @@ struct dma_buf_attach_ops {
* called with this lock held as well. This makes sure that no mapping
* is created concurrently with an ongoing move operation.
*
- * Mappings stay valid and are not directly affected by this callback.
- * But the DMA-buf can now be in a different physical location, so all
- * mappings should be destroyed and re-created as soon as possible.
- *
- * New mappings can be created after this callback returns, and will
- * point to the new location of the DMA-buf.
+ * See the kdoc for dma_buf_invalidate_mappings() for details on the
+ * required behavior.
*/
- void (*move_notify)(struct dma_buf_attachment *attach);
+ void (*invalidate_mappings)(struct dma_buf_attachment *attach);
};
/**
@@ -578,7 +574,8 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
enum dma_data_direction);
void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
enum dma_data_direction);
-void dma_buf_move_notify(struct dma_buf *dma_buf);
+void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
+bool dma_buf_attach_revocable(struct dma_buf_attachment *attach);
int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
enum dma_data_direction dir);
int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h
index 079b3dec0a16..370b3d2bba37 100644
--- a/include/linux/dma-fence-array.h
+++ b/include/linux/dma-fence-array.h
@@ -38,7 +38,6 @@ struct dma_fence_array_cb {
struct dma_fence_array {
struct dma_fence base;
- spinlock_t lock;
unsigned num_fences;
atomic_t num_pending;
struct dma_fence **fences;
diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
index 5cd3ba53b4a1..df3beadf1515 100644
--- a/include/linux/dma-fence-chain.h
+++ b/include/linux/dma-fence-chain.h
@@ -46,7 +46,6 @@ struct dma_fence_chain {
*/
struct irq_work work;
};
- spinlock_t lock;
};
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index d4c92fd35092..b52ab692b22e 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -34,7 +34,8 @@ struct seq_file;
* @ops: dma_fence_ops associated with this fence
* @rcu: used for releasing fence with kfree_rcu
* @cb_list: list of all callbacks to call
- * @lock: spin_lock_irqsave used for locking
+ * @extern_lock: external spin_lock_irqsave used for locking (deprecated)
+ * @inline_lock: alternative internal spin_lock_irqsave used for locking
* @context: execution context this fence belongs to, returned by
* dma_fence_context_alloc()
* @seqno: the sequence number of this fence inside the execution context,
@@ -48,6 +49,8 @@ struct seq_file;
* atomic ops (bit_*), so taking the spinlock will not be needed most
* of the time.
*
+ * DMA_FENCE_FLAG_INITIALIZED_BIT - fence was initialized
+ * DMA_FENCE_FLAG_INLINE_LOCK_BIT - use inline spinlock instead of external one
* DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
* DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
* DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
@@ -65,8 +68,11 @@ struct seq_file;
* been completed, or never called at all.
*/
struct dma_fence {
- spinlock_t *lock;
- const struct dma_fence_ops *ops;
+ union {
+ spinlock_t *extern_lock;
+ spinlock_t inline_lock;
+ };
+ const struct dma_fence_ops __rcu *ops;
/*
* We clear the callback list on kref_put so that by the time we
* release the fence it is unused. No one should be adding to the
@@ -98,6 +104,8 @@ struct dma_fence {
};
enum dma_fence_flag_bits {
+ DMA_FENCE_FLAG_INITIALIZED_BIT,
+ DMA_FENCE_FLAG_INLINE_LOCK_BIT,
DMA_FENCE_FLAG_SEQNO64_BIT,
DMA_FENCE_FLAG_SIGNALED_BIT,
DMA_FENCE_FLAG_TIMESTAMP_BIT,
@@ -218,6 +226,10 @@ struct dma_fence_ops {
* timed out. Can also return other error values on custom implementations,
* which should be treated as if the fence is signaled. For example a hardware
* lockup could be reported like that.
+ *
+ * Implementing this callback prevents the fence from detaching after
+ * signaling and so it is necessary for the module providing the
+ * dma_fence_ops to stay loaded as long as the dma_fence exists.
*/
signed long (*wait)(struct dma_fence *fence,
bool intr, signed long timeout);
@@ -229,6 +241,13 @@ struct dma_fence_ops {
* Can be called from irq context. This callback is optional. If it is
* NULL, then dma_fence_free() is instead called as the default
* implementation.
+ *
+ * Implementing this callback prevents the fence from detaching after
+ * signaling and so it is necessary for the module providing the
+ * dma_fence_ops to stay loaded as long as the dma_fence exists.
+ *
+ * If the callback is implemented the memory backing the dma_fence
+ * object must be freed RCU safe.
*/
void (*release)(struct dma_fence *fence);
@@ -264,6 +283,19 @@ void dma_fence_free(struct dma_fence *fence);
void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq);
/**
+ * dma_fence_was_initialized - test if fence was initialized
+ * @fence: fence to test
+ *
+ * Return: True if fence was ever initialized, false otherwise. Works correctly
+ * only when memory backing the fence structure is zero initialized on
+ * allocation.
+ */
+static inline bool dma_fence_was_initialized(struct dma_fence *fence)
+{
+ return fence && test_bit(DMA_FENCE_FLAG_INITIALIZED_BIT, &fence->flags);
+}
+
+/**
* dma_fence_put - decreases refcount of the fence
* @fence: fence to reduce refcount of
*/
@@ -351,6 +383,45 @@ dma_fence_get_rcu_safe(struct dma_fence __rcu **fencep)
} while (1);
}
+/**
+ * dma_fence_spinlock - return pointer to the spinlock protecting the fence
+ * @fence: the fence to get the lock from
+ *
+ * Return either the pointer to the embedded or the external spin lock.
+ */
+static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence)
+{
+ return test_bit(DMA_FENCE_FLAG_INLINE_LOCK_BIT, &fence->flags) ?
+ &fence->inline_lock : fence->extern_lock;
+}
+
+/**
+ * dma_fence_lock_irqsave - irqsave lock the fence
+ * @fence: the fence to lock
+ * @flags: where to store the CPU flags.
+ *
+ * Lock the fence, preventing it from changing to the signaled state.
+ */
+#define dma_fence_lock_irqsave(fence, flags) \
+ spin_lock_irqsave(dma_fence_spinlock(fence), flags)
+
+/**
+ * dma_fence_unlock_irqrestore - unlock the fence and irqrestore
+ * @fence: the fence to unlock
+ * @flags: the CPU flags to restore
+ *
+ * Unlock the fence, allowing it to change its state to signaled again.
+ */
+#define dma_fence_unlock_irqrestore(fence, flags) \
+ spin_unlock_irqrestore(dma_fence_spinlock(fence), flags)
+
+/**
+ * dma_fence_assert_held - lockdep assertion that fence is locked
+ * @fence: the fence which should be locked
+ */
+#define dma_fence_assert_held(fence) \
+ lockdep_assert_held(dma_fence_spinlock(fence));
+
#ifdef CONFIG_LOCKDEP
bool dma_fence_begin_signalling(void);
void dma_fence_end_signalling(bool cookie);
@@ -439,13 +510,19 @@ dma_fence_test_signaled_flag(struct dma_fence *fence)
static inline bool
dma_fence_is_signaled_locked(struct dma_fence *fence)
{
+ const struct dma_fence_ops *ops;
+
if (dma_fence_test_signaled_flag(fence))
return true;
- if (fence->ops->signaled && fence->ops->signaled(fence)) {
+ rcu_read_lock();
+ ops = rcu_dereference(fence->ops);
+ if (ops && ops->signaled && ops->signaled(fence)) {
+ rcu_read_unlock();
dma_fence_signal_locked(fence);
return true;
}
+ rcu_read_unlock();
return false;
}
@@ -469,13 +546,19 @@ dma_fence_is_signaled_locked(struct dma_fence *fence)
static inline bool
dma_fence_is_signaled(struct dma_fence *fence)
{
+ const struct dma_fence_ops *ops;
+
if (dma_fence_test_signaled_flag(fence))
return true;
- if (fence->ops->signaled && fence->ops->signaled(fence)) {
+ rcu_read_lock();
+ ops = rcu_dereference(fence->ops);
+ if (ops && ops->signaled && ops->signaled(fence)) {
+ rcu_read_unlock();
dma_fence_signal(fence);
return true;
}
+ rcu_read_unlock();
return false;
}
@@ -680,7 +763,7 @@ extern const struct dma_fence_ops dma_fence_chain_ops;
*/
static inline bool dma_fence_is_array(struct dma_fence *fence)
{
- return fence->ops == &dma_fence_array_ops;
+ return rcu_access_pointer(fence->ops) == &dma_fence_array_ops;
}
/**
@@ -691,7 +774,7 @@ static inline bool dma_fence_is_array(struct dma_fence *fence)
*/
static inline bool dma_fence_is_chain(struct dma_fence *fence)
{
- return fence->ops == &dma_fence_chain_ops;
+ return rcu_access_pointer(fence->ops) == &dma_fence_chain_ops;
}
/**
diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
new file mode 100644
index 000000000000..5fa917ba5450
--- /dev/null
+++ b/include/linux/gpu_buddy.h
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __GPU_BUDDY_H__
+#define __GPU_BUDDY_H__
+
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/rbtree.h>
+#include <linux/rbtree_augmented.h>
+
+/**
+ * GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range
+ *
+ * When set, allocation is restricted to the range [start, end) specified
+ * in gpu_buddy_alloc_blocks(). Without this flag, start/end are ignored
+ * and allocation can use any free space.
+ */
+#define GPU_BUDDY_RANGE_ALLOCATION BIT(0)
+
+/**
+ * GPU_BUDDY_TOPDOWN_ALLOCATION - Allocate from top of address space
+ *
+ * Allocate starting from high addresses and working down. Useful for
+ * separating different allocation types (e.g., kernel vs userspace)
+ * to reduce fragmentation.
+ */
+#define GPU_BUDDY_TOPDOWN_ALLOCATION BIT(1)
+
+/**
+ * GPU_BUDDY_CONTIGUOUS_ALLOCATION - Require physically contiguous blocks
+ *
+ * The allocation must be satisfied with a single contiguous block.
+ * If the requested size cannot be allocated contiguously, the
+ * allocation fails with -ENOSPC.
+ */
+#define GPU_BUDDY_CONTIGUOUS_ALLOCATION BIT(2)
+
+/**
+ * GPU_BUDDY_CLEAR_ALLOCATION - Prefer pre-cleared (zeroed) memory
+ *
+ * Attempt to allocate from the clear tree first. If insufficient clear
+ * memory is available, falls back to dirty memory. Useful when the
+ * caller needs zeroed memory and wants to avoid GPU clear operations.
+ */
+#define GPU_BUDDY_CLEAR_ALLOCATION BIT(3)
+
+/**
+ * GPU_BUDDY_CLEARED - Mark returned blocks as cleared
+ *
+ * Used with gpu_buddy_free_list() to indicate that the memory being
+ * freed has been cleared (zeroed). The blocks will be placed in the
+ * clear tree for future GPU_BUDDY_CLEAR_ALLOCATION requests.
+ */
+#define GPU_BUDDY_CLEARED BIT(4)
+
+/**
+ * GPU_BUDDY_TRIM_DISABLE - Disable automatic block trimming
+ *
+ * By default, if an allocation is smaller than the allocated block,
+ * excess memory is trimmed and returned to the free pool. This flag
+ * disables trimming, keeping the full power-of-two block size.
+ */
+#define GPU_BUDDY_TRIM_DISABLE BIT(5)
+
+enum gpu_buddy_free_tree {
+ GPU_BUDDY_CLEAR_TREE = 0,
+ GPU_BUDDY_DIRTY_TREE,
+ GPU_BUDDY_MAX_FREE_TREES,
+};
+
+#define for_each_free_tree(tree) \
+ for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++)
+
+/**
+ * struct gpu_buddy_block - Block within a buddy allocator
+ *
+ * Each block in the buddy allocator is represented by this structure.
+ * Blocks are organized in a binary tree where each parent block can be
+ * split into two children (left and right buddies). The allocator manages
+ * blocks at various orders (power-of-2 sizes) from chunk_size up to the
+ * largest contiguous region.
+ *
+ * @private: Private data owned by the allocator user (e.g., driver-specific data)
+ * @link: List node for user ownership while block is allocated
+ */
+struct gpu_buddy_block {
+/* private: */
+ /*
+ * Header bit layout:
+ * - Bits 63:12: block offset within the address space
+ * - Bits 11:10: state (ALLOCATED, FREE, or SPLIT)
+ * - Bit 9: clear bit (1 if memory is zeroed)
+ * - Bits 8:6: reserved
+ * - Bits 5:0: order (log2 of size relative to chunk_size)
+ */
+#define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
+#define GPU_BUDDY_HEADER_STATE GENMASK_ULL(11, 10)
+#define GPU_BUDDY_ALLOCATED (1 << 10)
+#define GPU_BUDDY_FREE (2 << 10)
+#define GPU_BUDDY_SPLIT (3 << 10)
+#define GPU_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9)
+/* Free to be used, if needed in the future */
+#define GPU_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
+#define GPU_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0)
+ u64 header;
+
+ struct gpu_buddy_block *left;
+ struct gpu_buddy_block *right;
+ struct gpu_buddy_block *parent;
+/* public: */
+ void *private; /* owned by creator */
+
+ /*
+ * While the block is allocated by the user through gpu_buddy_alloc*,
+ * the user has ownership of the link, for example to maintain within
+ * a list, if so desired. As soon as the block is freed with
+ * gpu_buddy_free* ownership is given back to the mm.
+ */
+ union {
+/* private: */
+ struct rb_node rb;
+/* public: */
+ struct list_head link;
+ };
+/* private: */
+ struct list_head tmp_link;
+ unsigned int subtree_max_alignment;
+};
+
+/* Order-zero must be at least SZ_4K */
+#define GPU_BUDDY_MAX_ORDER (63 - 12)
+
+/**
+ * struct gpu_buddy - GPU binary buddy allocator
+ *
+ * The buddy allocator provides efficient power-of-two memory allocation
+ * with fast allocation and free operations. It is commonly used for GPU
+ * memory management where allocations can be split into power-of-two
+ * block sizes.
+ *
+ * Locking should be handled by the user; a simple mutex around
+ * gpu_buddy_alloc_blocks() and gpu_buddy_free_block()/gpu_buddy_free_list()
+ * should suffice.
+ *
+ * @n_roots: Number of root blocks in the roots array.
+ * @max_order: Maximum block order (log2 of largest block size / chunk_size).
+ * @chunk_size: Minimum allocation granularity in bytes. Must be at least SZ_4K.
+ * @size: Total size of the address space managed by this allocator in bytes.
+ * @avail: Total free space currently available for allocation in bytes.
+ * @clear_avail: Free space available in the clear tree (zeroed memory) in bytes.
+ * This is a subset of @avail.
+ */
+struct gpu_buddy {
+/* private: */
+ /*
+ * Array of red-black trees for free block management.
+ * Indexed as free_trees[clear/dirty][order] where:
+ * - Index 0 (GPU_BUDDY_CLEAR_TREE): blocks with zeroed content
+ * - Index 1 (GPU_BUDDY_DIRTY_TREE): blocks with unknown content
+ * Each tree holds free blocks of the corresponding order.
+ */
+ struct rb_root **free_trees;
+ /*
+ * Array of root blocks representing the top-level blocks of the
+ * binary tree(s). Multiple roots exist when the total size is not
+ * a power of two, with each root being the largest power-of-two
+ * that fits in the remaining space.
+ */
+ struct gpu_buddy_block **roots;
+/* public: */
+ unsigned int n_roots;
+ unsigned int max_order;
+ u64 chunk_size;
+ u64 size;
+ u64 avail;
+ u64 clear_avail;
+};
+
+static inline u64
+gpu_buddy_block_offset(const struct gpu_buddy_block *block)
+{
+ return block->header & GPU_BUDDY_HEADER_OFFSET;
+}
+
+static inline unsigned int
+gpu_buddy_block_order(struct gpu_buddy_block *block)
+{
+ return block->header & GPU_BUDDY_HEADER_ORDER;
+}
+
+static inline bool
+gpu_buddy_block_is_free(struct gpu_buddy_block *block)
+{
+ return (block->header & GPU_BUDDY_HEADER_STATE) == GPU_BUDDY_FREE;
+}
+
+static inline bool
+gpu_buddy_block_is_clear(struct gpu_buddy_block *block)
+{
+ return block->header & GPU_BUDDY_HEADER_CLEAR;
+}
+
+static inline u64
+gpu_buddy_block_size(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block)
+{
+ return mm->chunk_size << gpu_buddy_block_order(block);
+}
+
+int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size);
+
+void gpu_buddy_fini(struct gpu_buddy *mm);
+
+int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
+ u64 start, u64 end, u64 size,
+ u64 min_page_size,
+ struct list_head *blocks,
+ unsigned long flags);
+
+int gpu_buddy_block_trim(struct gpu_buddy *mm,
+ u64 *start,
+ u64 new_size,
+ struct list_head *blocks);
+
+void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear);
+
+void gpu_buddy_free_block(struct gpu_buddy *mm, struct gpu_buddy_block *block);
+
+void gpu_buddy_free_list(struct gpu_buddy *mm,
+ struct list_head *objects,
+ unsigned int flags);
+
+void gpu_buddy_print(struct gpu_buddy *mm);
+void gpu_buddy_block_print(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block);
+#endif
diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index bdd2e0652bc3..53edd69acb9b 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -159,7 +159,7 @@
*
* This macro does not rely on timekeeping. Hence it is safe to call even when
* timekeeping is suspended, at the expense of an underestimation of wall clock
- * time, which is rather minimal with a non-zero delay_us.
+ * time, which is rather minimal with a non-zero @delay_us.
*
* When available, you'll probably want to use one of the specialized
* macros defined below rather than this macro directly.
@@ -167,9 +167,9 @@
* Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
* case, the last read value at @args is stored in @val.
*/
-#define read_poll_timeout_atomic(op, val, cond, sleep_us, timeout_us, \
- sleep_before_read, args...) \
- poll_timeout_us_atomic((val) = op(args), cond, sleep_us, timeout_us, sleep_before_read)
+#define read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, \
+ delay_before_read, args...) \
+ poll_timeout_us_atomic((val) = op(args), cond, delay_us, timeout_us, delay_before_read)
/**
* readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs
diff --git a/include/linux/math.h b/include/linux/math.h
index 6dc1d1d32fbc..1e8fb3efbc8c 100644
--- a/include/linux/math.h
+++ b/include/linux/math.h
@@ -89,23 +89,7 @@
} \
)
-/*
- * Divide positive or negative dividend by positive or negative divisor
- * and round to closest integer. Result is undefined for negative
- * divisors if the dividend variable type is unsigned and for negative
- * dividends if the divisor variable type is unsigned.
- */
-#define DIV_ROUND_CLOSEST(x, divisor)( \
-{ \
- typeof(x) __x = x; \
- typeof(divisor) __d = divisor; \
- (((typeof(x))-1) > 0 || \
- ((typeof(divisor))-1) > 0 || \
- (((__x) > 0) == ((__d) > 0))) ? \
- (((__x) + ((__d) / 2)) / (__d)) : \
- (((__x) - ((__d) / 2)) / (__d)); \
-} \
-)
+#define DIV_ROUND_CLOSEST __KERNEL_DIV_ROUND_CLOSEST
/*
* Same as above but for u64 dividends. divisor must be a 32-bit
* number.
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 8450e18a87c2..0da15adb4aac 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -234,15 +234,57 @@ struct mmu_notifier {
};
/**
+ * struct mmu_interval_notifier_finish - mmu_interval_notifier two-pass abstraction
+ * @link: Lockless list link for the notifiers pending pass list
+ * @notifier: The mmu_interval_notifier for which the finish pass is called.
+ *
+ * Allocate, typically using GFP_NOWAIT in the interval notifier's start pass.
+ * Note that with a large number of notifiers implementing two passes,
+ * allocation with GFP_NOWAIT will become increasingly likely to fail, so consider
+ * implementing a small pool instead of using kmalloc() allocations.
+ *
+ * If the implementation needs to pass data between the start and the finish passes,
+ * the recommended way is to embed struct mmu_interval_notifier_finish into a larger
+ * structure that also contains the data needed to be shared. Keep in mind that
+ * a notifier callback can be invoked in parallel, and each invocation needs its
+ * own struct mmu_interval_notifier_finish.
+ *
+ * If allocation fails, then the &mmu_interval_notifier_ops->invalidate_start op
+ * needs to implements the full notifier functionality. Please refer to its
+ * documentation.
+ */
+struct mmu_interval_notifier_finish {
+ struct llist_node link;
+ struct mmu_interval_notifier *notifier;
+};
+
+/**
* struct mmu_interval_notifier_ops - callback for range notification
* @invalidate: Upon return the caller must stop using any SPTEs within this
* range. This function can sleep. Return false only if sleeping
* was required but mmu_notifier_range_blockable(range) is false.
+ * @invalidate_start: Similar to @invalidate, but intended for two-pass notifier
+ * callbacks where the call to @invalidate_start is the first
+ * pass and any struct mmu_interval_notifier_finish pointer
+ * returned in the @finish parameter describes the finish pass.
+ * If *@finish is %NULL on return, then no final pass will be
+ * called, and @invalidate_start needs to implement the full
+ * notifier, behaving like @invalidate. The value of *@finish
+ * is guaranteed to be %NULL at function entry.
+ * @invalidate_finish: Called as the second pass for any notifier that returned
+ * a non-NULL *@finish from @invalidate_start. The @finish
+ * pointer passed here is the same one returned by
+ * @invalidate_start.
*/
struct mmu_interval_notifier_ops {
bool (*invalidate)(struct mmu_interval_notifier *interval_sub,
const struct mmu_notifier_range *range,
unsigned long cur_seq);
+ bool (*invalidate_start)(struct mmu_interval_notifier *interval_sub,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq,
+ struct mmu_interval_notifier_finish **finish);
+ void (*invalidate_finish)(struct mmu_interval_notifier_finish *finish);
};
struct mmu_interval_notifier {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3e51190a55e4..841b40031833 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -260,6 +260,8 @@ enum node_stat_item {
#endif
NR_BALLOON_PAGES,
NR_KERNEL_FILE_PAGES,
+ NR_GPU_ACTIVE, /* Pages assigned to GPU objects */
+ NR_GPU_RECLAIM, /* Pages in shrinkable GPU pools */
NR_VM_NODE_STAT_ITEMS
};