From 6703cb3dced01f32982b9f7069ef1336d0225077 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Mon, 24 Mar 2025 16:56:48 +0900 Subject: RDMA/rxe: Enable ODP in RDMA FLUSH operation For persistent memories, add rxe_odp_flush_pmem_iova() so that ODP specific steps are executed. Otherwise, no additional consideration is required. Signed-off-by: Daisuke Matsuda Link: https://patch.msgid.link/20250324075649.3313968-2-matsuda-daisuke@fujitsu.com Reviewed-by: Li Zhijian Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d42eae69d9a8..41bf98ccb275 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -325,6 +325,7 @@ enum ib_odp_transport_cap_bits { IB_ODP_SUPPORT_READ = 1 << 3, IB_ODP_SUPPORT_ATOMIC = 1 << 4, IB_ODP_SUPPORT_SRQ_RECV = 1 << 5, + IB_ODP_SUPPORT_FLUSH = 1 << 6, }; struct ib_odp_caps { -- cgit v1.2.3 From b84001ad0ceeb34bc3fd6c383f197326d4fe8353 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Mon, 24 Mar 2025 16:56:49 +0900 Subject: RDMA/rxe: Enable ODP in ATOMIC WRITE operation Add rxe_odp_do_atomic_write() so that ODP specific steps are applied to ATOMIC WRITE requests. Signed-off-by: Daisuke Matsuda Link: https://patch.msgid.link/20250324075649.3313968-3-matsuda-daisuke@fujitsu.com Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 41bf98ccb275..0a7ccd08b365 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -326,6 +326,7 @@ enum ib_odp_transport_cap_bits { IB_ODP_SUPPORT_ATOMIC = 1 << 4, IB_ODP_SUPPORT_SRQ_RECV = 1 << 5, IB_ODP_SUPPORT_FLUSH = 1 << 6, + IB_ODP_SUPPORT_ATOMIC_WRITE = 1 << 7, }; struct ib_odp_caps { -- cgit v1.2.3 From 8f49682d94f3a12a6a3e636a07bbe57c80329d1d Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Mon, 14 Apr 2025 02:00:33 -0700 Subject: RDMA/mana_ib: support of the zero based MRs Add IB_ZERO_BASED to the valid flags and use the corresponding MR creation request for the zero based memory. Signed-off-by: Konstantin Taranov Link: https://patch.msgid.link/1744621234-26114-3-git-send-email-kotaranov@linux.microsoft.com Reviewed-by: Long Li Signed-off-by: Leon Romanovsky --- include/net/mana/gdma.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 228603bf03f2..239a70032550 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -815,6 +815,8 @@ enum gdma_mr_type { * address that is set up in the MST */ GDMA_MR_TYPE_GVA = 2, + /* Guest zero-based address MRs */ + GDMA_MR_TYPE_ZBVA = 4, }; struct gdma_create_mr_params { @@ -826,6 +828,10 @@ struct gdma_create_mr_params { u64 virtual_address; enum gdma_mr_access_flags access_flags; } gva; + struct { + u64 dma_region_handle; + enum gdma_mr_access_flags access_flags; + } zbva; }; }; @@ -841,7 +847,10 @@ struct gdma_create_mr_request { u64 virtual_address; enum gdma_mr_access_flags access_flags; } gva; - + struct { + u64 dma_region_handle; + enum gdma_mr_access_flags access_flags; + } zbva; }; u32 reserved_2; };/* HW DATA */ -- cgit v1.2.3 From f1652d76f4c51b5aefd14706eecbd70f05ca987a Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Mon, 14 Apr 2025 02:00:34 -0700 Subject: RDMA/mana_ib: Add support of 4M, 1G, and 2G pages Check PF capability flag whether the 4M, 1G, and 2G pages are supported. Add these pages sizes to mana_ib, if supported. Define possible page sizes in enum gdma_page_type and remove unused enum atb_page_size. Signed-off-by: Konstantin Taranov Link: https://patch.msgid.link/1744621234-26114-4-git-send-email-kotaranov@linux.microsoft.com Reviewed-by: Long Li Signed-off-by: Leon Romanovsky --- include/net/mana/gdma.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 239a70032550..ffa9820f14ba 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -407,6 +407,8 @@ struct gdma_context { /* Azure RDMA adapter */ struct gdma_dev mana_ib; + + u64 pf_cap_flags1; }; static inline bool mana_gd_is_mana(struct gdma_dev *gd) @@ -553,6 +555,7 @@ enum { */ #define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2) #define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3) +#define GDMA_DRV_CAP_FLAG_1_GDMA_PAGES_4MB_1GB_2GB BIT(4) #define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5) /* Driver can handle holes (zeros) in the device list */ @@ -707,20 +710,6 @@ struct gdma_query_hwc_timeout_resp { u32 reserved; }; -enum atb_page_size { - ATB_PAGE_SIZE_4K, - ATB_PAGE_SIZE_8K, - ATB_PAGE_SIZE_16K, - ATB_PAGE_SIZE_32K, - ATB_PAGE_SIZE_64K, - ATB_PAGE_SIZE_128K, - ATB_PAGE_SIZE_256K, - ATB_PAGE_SIZE_512K, - ATB_PAGE_SIZE_1M, - ATB_PAGE_SIZE_2M, - ATB_PAGE_SIZE_MAX, -}; - enum gdma_mr_access_flags { GDMA_ACCESS_FLAG_LOCAL_READ = BIT_ULL(0), GDMA_ACCESS_FLAG_LOCAL_WRITE = BIT_ULL(1), -- cgit v1.2.3 From 04039390cc3cb3d1e6ce6bb1680cbdfe117d6473 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 18 Apr 2025 17:58:48 +0100 Subject: RDMA/cma: Remove unused rdma_res_to_id The last use of rdma_res_to_id() was removed in 2020 by commi t211cd9459fda ("RDMA: Add dedicated CM_ID resource tracker function") Remove it. Signed-off-by: Dr. David Alan Gilbert Link: https://patch.msgid.link/20250418165848.241305-1-linux@treblig.org Signed-off-by: Leon Romanovsky --- include/rdma/rdma_cm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 8a8ab2f793ab..d1593ad47e28 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -388,6 +388,5 @@ void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid, union ib_gid *dgid); struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *cm_id); -struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res); #endif /* RDMA_CM_H */ -- cgit v1.2.3 From 685f9537a72877693a1ab116d155acc89562c29b Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Fri, 18 Apr 2025 14:13:45 +0900 Subject: RDMA/core: Move ODP capability definitions to uapi The bits are used from both kernel space and userland, so they should be placed in UAPI. Signed-off-by: Daisuke Matsuda Link: https://patch.msgid.link/20250418051345.1022339-2-matsuda-daisuke@fujitsu.com Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 20 ++++++++++---------- include/uapi/rdma/ib_user_verbs.h | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0a7ccd08b365..b06a0ed81bdd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -314,19 +314,19 @@ enum ib_atomic_cap { }; enum ib_odp_general_cap_bits { - IB_ODP_SUPPORT = 1 << 0, - IB_ODP_SUPPORT_IMPLICIT = 1 << 1, + IB_ODP_SUPPORT = IB_UVERBS_ODP_SUPPORT, + IB_ODP_SUPPORT_IMPLICIT = IB_UVERBS_ODP_SUPPORT_IMPLICIT, }; enum ib_odp_transport_cap_bits { - IB_ODP_SUPPORT_SEND = 1 << 0, - IB_ODP_SUPPORT_RECV = 1 << 1, - IB_ODP_SUPPORT_WRITE = 1 << 2, - IB_ODP_SUPPORT_READ = 1 << 3, - IB_ODP_SUPPORT_ATOMIC = 1 << 4, - IB_ODP_SUPPORT_SRQ_RECV = 1 << 5, - IB_ODP_SUPPORT_FLUSH = 1 << 6, - IB_ODP_SUPPORT_ATOMIC_WRITE = 1 << 7, + IB_ODP_SUPPORT_SEND = IB_UVERBS_ODP_SUPPORT_SEND, + IB_ODP_SUPPORT_RECV = IB_UVERBS_ODP_SUPPORT_RECV, + IB_ODP_SUPPORT_WRITE = IB_UVERBS_ODP_SUPPORT_WRITE, + IB_ODP_SUPPORT_READ = IB_UVERBS_ODP_SUPPORT_READ, + IB_ODP_SUPPORT_ATOMIC = IB_UVERBS_ODP_SUPPORT_ATOMIC, + IB_ODP_SUPPORT_SRQ_RECV = IB_UVERBS_ODP_SUPPORT_SRQ_RECV, + IB_ODP_SUPPORT_FLUSH = IB_UVERBS_ODP_SUPPORT_FLUSH, + IB_ODP_SUPPORT_ATOMIC_WRITE = IB_UVERBS_ODP_SUPPORT_ATOMIC_WRITE, }; struct ib_odp_caps { diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index e16650f0c85d..3b7bd99813e9 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -233,6 +233,22 @@ struct ib_uverbs_ex_query_device { __u32 reserved; }; +enum ib_uverbs_odp_general_cap_bits { + IB_UVERBS_ODP_SUPPORT = 1 << 0, + IB_UVERBS_ODP_SUPPORT_IMPLICIT = 1 << 1, +}; + +enum ib_uverbs_odp_transport_cap_bits { + IB_UVERBS_ODP_SUPPORT_SEND = 1 << 0, + IB_UVERBS_ODP_SUPPORT_RECV = 1 << 1, + IB_UVERBS_ODP_SUPPORT_WRITE = 1 << 2, + IB_UVERBS_ODP_SUPPORT_READ = 1 << 3, + IB_UVERBS_ODP_SUPPORT_ATOMIC = 1 << 4, + IB_UVERBS_ODP_SUPPORT_SRQ_RECV = 1 << 5, + IB_UVERBS_ODP_SUPPORT_FLUSH = 1 << 6, + IB_UVERBS_ODP_SUPPORT_ATOMIC_WRITE = 1 << 7, +}; + struct ib_uverbs_odp_caps { __aligned_u64 general_caps; struct { -- cgit v1.2.3 From 5d2ea5aebbb2f3ebde4403f9c55b2b057e5dd2d6 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Mon, 28 Apr 2025 14:34:07 +0300 Subject: RDMA/mlx5: Fix error flow upon firmware failure for RQ destruction Upon RQ destruction if the firmware command fails which is the last resource to be destroyed some SW resources were already cleaned regardless of the failure. Now properly rollback the object to its original state upon such failure. In order to avoid a use-after free in case someone tries to destroy the object again, which results in the following kernel trace: refcount_t: underflow; use-after-free. WARNING: CPU: 0 PID: 37589 at lib/refcount.c:28 refcount_warn_saturate+0xf4/0x148 Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) rfkill mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) psample mlxfw(OE) mlx_compat(OE) macsec tls pci_hyperv_intf sunrpc vfat fat virtio_net net_failover failover fuse loop nfnetlink vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vmw_vmci vsock xfs crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce virtio_console virtio_gpu virtio_blk virtio_dma_buf virtio_mmio dm_mirror dm_region_hash dm_log dm_mod xpmem(OE) CPU: 0 UID: 0 PID: 37589 Comm: python3 Kdump: loaded Tainted: G OE ------- --- 6.12.0-54.el10.aarch64 #1 Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : refcount_warn_saturate+0xf4/0x148 lr : refcount_warn_saturate+0xf4/0x148 sp : ffff80008b81b7e0 x29: ffff80008b81b7e0 x28: ffff000133d51600 x27: 0000000000000001 x26: 0000000000000000 x25: 00000000ffffffea x24: ffff00010ae80f00 x23: ffff00010ae80f80 x22: ffff0000c66e5d08 x21: 0000000000000000 x20: ffff0000c66e0000 x19: ffff00010ae80340 x18: 0000000000000006 x17: 0000000000000000 x16: 0000000000000020 x15: ffff80008b81b37f x14: 0000000000000000 x13: 2e656572662d7265 x12: ffff80008283ef78 x11: ffff80008257efd0 x10: ffff80008283efd0 x9 : ffff80008021ed90 x8 : 0000000000000001 x7 : 00000000000bffe8 x6 : c0000000ffff7fff x5 : ffff0001fb8e3408 x4 : 0000000000000000 x3 : ffff800179993000 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000133d51600 Call trace: refcount_warn_saturate+0xf4/0x148 mlx5_core_put_rsc+0x88/0xa0 [mlx5_ib] mlx5_core_destroy_rq_tracked+0x64/0x98 [mlx5_ib] mlx5_ib_destroy_wq+0x34/0x80 [mlx5_ib] ib_destroy_wq_user+0x30/0xc0 [ib_core] uverbs_free_wq+0x28/0x58 [ib_uverbs] destroy_hw_idr_uobject+0x34/0x78 [ib_uverbs] uverbs_destroy_uobject+0x48/0x240 [ib_uverbs] __uverbs_cleanup_ufile+0xd4/0x1a8 [ib_uverbs] uverbs_destroy_ufile_hw+0x48/0x120 [ib_uverbs] ib_uverbs_close+0x2c/0x100 [ib_uverbs] __fput+0xd8/0x2f0 __fput_sync+0x50/0x70 __arm64_sys_close+0x40/0x90 invoke_syscall.constprop.0+0x74/0xd0 do_el0_svc+0x48/0xe8 el0_svc+0x44/0x1d0 el0t_64_sync_handler+0x120/0x130 el0t_64_sync+0x1a4/0x1a8 Fixes: e2013b212f9f ("net/mlx5_core: Add RQ and SQ event handling") Signed-off-by: Patrisious Haddad Link: https://patch.msgid.link/3181433ccdd695c63560eeeb3f0c990961732101.1745839855.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d1dfbad9a447..e6ba8f4f4bd1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -398,6 +398,7 @@ struct mlx5_core_rsc_common { enum mlx5_res_type res; refcount_t refcount; struct completion free; + bool invalid; }; struct mlx5_uars_page { -- cgit v1.2.3 From 285e871884ff3dc31c0c2c1a87f0018481bc8471 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 28 Apr 2025 12:22:16 +0300 Subject: mm/hmm: let users to tag specific PFN with DMA mapped bit Introduce new sticky flag (HMM_PFN_DMA_MAPPED), which isn't overwritten by HMM range fault. Such flag allows users to tag specific PFNs with information if this specific PFN was already DMA mapped. Tested-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- include/linux/hmm.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 126a36571667..a43e56f273a1 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -23,6 +23,8 @@ struct mmu_interval_notifier; * HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID) * HMM_PFN_ERROR - accessing the pfn is impossible and the device should * fail. ie poisoned memory, special pages, no vma, etc + * HMM_PFN_DMA_MAPPED - Flag preserved on input-to-output transformation + * to mark that page is already DMA mapped * * On input: * 0 - Return the current state of the page, do not fault it. @@ -36,13 +38,19 @@ enum hmm_pfn_flags { HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1), HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2), HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3), - HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 8), + /* + * Sticky flags, carried from input to output, + * don't forget to update HMM_PFN_INOUT_FLAGS + */ + HMM_PFN_DMA_MAPPED = 1UL << (BITS_PER_LONG - 4), + + HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 9), /* Input flags */ HMM_PFN_REQ_FAULT = HMM_PFN_VALID, HMM_PFN_REQ_WRITE = HMM_PFN_WRITE, - HMM_PFN_FLAGS = 0xFFUL << HMM_PFN_ORDER_SHIFT, + HMM_PFN_FLAGS = ~((1UL << HMM_PFN_ORDER_SHIFT) - 1), }; /* @@ -57,6 +65,14 @@ static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn) return pfn_to_page(hmm_pfn & ~HMM_PFN_FLAGS); } +/* + * hmm_pfn_to_phys() - return physical address pointed to by a device entry + */ +static inline phys_addr_t hmm_pfn_to_phys(unsigned long hmm_pfn) +{ + return __pfn_to_phys(hmm_pfn & ~HMM_PFN_FLAGS); +} + /* * hmm_pfn_to_map_order() - return the CPU mapping size order * -- cgit v1.2.3 From 8cad47130566123b2c70ef2aa53be02ef1aee5e5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 28 Apr 2025 12:22:17 +0300 Subject: mm/hmm: provide generic DMA managing logic HMM callers use PFN list to populate range while calling to hmm_range_fault(), the conversion from PFN to DMA address is done by the callers with help of another DMA list. However, it is wasteful on any modern platform and by doing the right logic, that DMA list can be avoided. Provide generic logic to manage these lists and gave an interface to map/unmap PFNs to DMA addresses, without requiring from the callers to be an experts in DMA core API. Tested-by: Jens Axboe Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- include/linux/hmm-dma.h | 33 +++++++++++++++++++++++++++++++++ include/linux/hmm.h | 6 +++++- 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 include/linux/hmm-dma.h (limited to 'include') diff --git a/include/linux/hmm-dma.h b/include/linux/hmm-dma.h new file mode 100644 index 000000000000..f58b9fc71999 --- /dev/null +++ b/include/linux/hmm-dma.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */ +#ifndef LINUX_HMM_DMA_H +#define LINUX_HMM_DMA_H + +#include + +struct dma_iova_state; +struct pci_p2pdma_map_state; + +/* + * struct hmm_dma_map - array of PFNs and DMA addresses + * + * @state: DMA IOVA state + * @pfns: array of PFNs + * @dma_list: array of DMA addresses + * @dma_entry_size: size of each DMA entry in the array + */ +struct hmm_dma_map { + struct dma_iova_state state; + unsigned long *pfn_list; + dma_addr_t *dma_list; + size_t dma_entry_size; +}; + +int hmm_dma_map_alloc(struct device *dev, struct hmm_dma_map *map, + size_t nr_entries, size_t dma_entry_size); +void hmm_dma_map_free(struct device *dev, struct hmm_dma_map *map); +dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map, + size_t idx, + struct pci_p2pdma_map_state *p2pdma_state); +bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx); +#endif /* LINUX_HMM_DMA_H */ diff --git a/include/linux/hmm.h b/include/linux/hmm.h index a43e56f273a1..db75ffc949a7 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -23,6 +23,8 @@ struct mmu_interval_notifier; * HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID) * HMM_PFN_ERROR - accessing the pfn is impossible and the device should * fail. ie poisoned memory, special pages, no vma, etc + * HMM_PFN_P2PDMA - P2P page + * HMM_PFN_P2PDMA_BUS - Bus mapped P2P transfer * HMM_PFN_DMA_MAPPED - Flag preserved on input-to-output transformation * to mark that page is already DMA mapped * @@ -43,8 +45,10 @@ enum hmm_pfn_flags { * don't forget to update HMM_PFN_INOUT_FLAGS */ HMM_PFN_DMA_MAPPED = 1UL << (BITS_PER_LONG - 4), + HMM_PFN_P2PDMA = 1UL << (BITS_PER_LONG - 5), + HMM_PFN_P2PDMA_BUS = 1UL << (BITS_PER_LONG - 6), - HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 9), + HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 11), /* Input flags */ HMM_PFN_REQ_FAULT = HMM_PFN_VALID, -- cgit v1.2.3 From eedd5b1276e76d6b260a7a77a149ef5155aa76f0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 28 Apr 2025 12:22:18 +0300 Subject: RDMA/umem: Store ODP access mask information in PFN As a preparation to remove dma_list, store access mask in PFN pointer and not in dma_addr_t. Tested-by: Jens Axboe Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- include/rdma/ib_umem_odp.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 0844c1d05ac6..a345c26a745d 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -8,6 +8,7 @@ #include #include +#include struct ib_umem_odp { struct ib_umem umem; @@ -67,19 +68,6 @@ static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp) umem_odp->page_shift; } -/* - * The lower 2 bits of the DMA address signal the R/W permissions for - * the entry. To upgrade the permissions, provide the appropriate - * bitmask to the map_dma_pages function. - * - * Be aware that upgrading a mapped address might result in change of - * the DMA address for the page. - */ -#define ODP_READ_ALLOWED_BIT (1<<0ULL) -#define ODP_WRITE_ALLOWED_BIT (1<<1ULL) - -#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) - #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_umem_odp * -- cgit v1.2.3 From 1efe8c0670d6a6883faa09c9abc746c741f5664a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 28 Apr 2025 12:22:19 +0300 Subject: RDMA/core: Convert UMEM ODP DMA mapping to caching IOVA and page linkage Reuse newly added DMA API to cache IOVA and only link/unlink pages in fast path for UMEM ODP flow. Tested-by: Jens Axboe Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- include/rdma/ib_umem_odp.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index a345c26a745d..2a24bf791c10 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -8,24 +8,17 @@ #include #include -#include +#include struct ib_umem_odp { struct ib_umem umem; struct mmu_interval_notifier notifier; struct pid *tgid; - /* An array of the pfns included in the on-demand paging umem. */ - unsigned long *pfn_list; + struct hmm_dma_map map; /* - * An array with DMA addresses mapped for pfns in pfn_list. - * The lower two bits designate access permissions. - * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT. - */ - dma_addr_t *dma_list; - /* - * The umem_mutex protects the page_list and dma_list fields of an ODP + * The umem_mutex protects the page_list field of an ODP * umem, allowing only a single thread to map/unmap pages. The mutex * also protects access to the mmu notifier counters. */ -- cgit v1.2.3 From ced82fce77e93315239f54caebbc88e263078e31 Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Wed, 7 May 2025 08:59:02 -0700 Subject: net: mana: Probe rdma device in mana driver Initialize gdma device for rdma inside mana module. For each gdma device, initialize an auxiliary ib device. Signed-off-by: Konstantin Taranov Link: https://patch.msgid.link/1746633545-17653-2-git-send-email-kotaranov@linux.microsoft.com Reviewed-by: Long Li Signed-off-by: Leon Romanovsky --- include/net/mana/mana.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 0f78065de8fe..5857efc885a6 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -488,6 +488,9 @@ int mana_detach(struct net_device *ndev, bool from_close); int mana_probe(struct gdma_dev *gd, bool resuming); void mana_remove(struct gdma_dev *gd, bool suspending); +int mana_rdma_probe(struct gdma_dev *gd); +void mana_rdma_remove(struct gdma_dev *gd); + void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev); int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames, u32 flags); -- cgit v1.2.3 From 505cc26bcae00699bacaee66cd50ede7a9cc89cb Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 7 May 2025 08:59:05 -0700 Subject: net: mana: Add support for auxiliary device servicing events Handle soc servicing events which require the rdma auxiliary device resources to be cleaned up during a suspend, and re-initialized during a resume. Signed-off-by: Shiraz Saleem Signed-off-by: Konstantin Taranov Link: https://patch.msgid.link/1746633545-17653-5-git-send-email-kotaranov@linux.microsoft.com Signed-off-by: Leon Romanovsky --- include/net/mana/gdma.h | 19 +++++++++++++++++++ include/net/mana/hw_channel.h | 9 +++++++++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index ffa9820f14ba..3ce56a816425 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -60,6 +60,7 @@ enum gdma_eqe_type { GDMA_EQE_HWC_INIT_DONE = 131, GDMA_EQE_HWC_SOC_RECONFIG = 132, GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133, + GDMA_EQE_HWC_SOC_SERVICE = 134, GDMA_EQE_RNIC_QP_FATAL = 176, }; @@ -70,6 +71,18 @@ enum { GDMA_DEVICE_MANA_IB = 3, }; +enum gdma_service_type { + GDMA_SERVICE_TYPE_NONE = 0, + GDMA_SERVICE_TYPE_RDMA_SUSPEND = 1, + GDMA_SERVICE_TYPE_RDMA_RESUME = 2, +}; + +struct mana_service_work { + struct work_struct work; + struct gdma_dev *gdma_dev; + enum gdma_service_type event; +}; + struct gdma_resource { /* Protect the bitmap */ spinlock_t lock; @@ -224,6 +237,8 @@ struct gdma_dev { void *driver_data; struct auxiliary_device *adev; + bool is_suspended; + bool rdma_teardown; }; /* MANA_PAGE_SIZE is the DMA unit */ @@ -409,6 +424,8 @@ struct gdma_context { struct gdma_dev mana_ib; u64 pf_cap_flags1; + + struct workqueue_struct *service_wq; }; static inline bool mana_gd_is_mana(struct gdma_dev *gd) @@ -891,4 +908,6 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle); void mana_register_debugfs(void); void mana_unregister_debugfs(void); +int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event); + #endif /* _GDMA_H */ diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h index 158b125692c2..83cf93338eb3 100644 --- a/include/net/mana/hw_channel.h +++ b/include/net/mana/hw_channel.h @@ -49,6 +49,15 @@ union hwc_init_type_data { }; }; /* HW DATA */ +union hwc_init_soc_service_type { + u32 as_uint32; + + struct { + u32 value : 28; + u32 type : 4; + }; +}; /* HW DATA */ + struct hwc_rx_oob { u32 type : 6; u32 eom : 1; -- cgit v1.2.3 From 45611fe821af0e09cb9dc2dfd2418b2e1e1831f3 Mon Sep 17 00:00:00 2001 From: Vlad Dumitrescu Date: Wed, 21 May 2025 14:34:17 +0300 Subject: IB/cm: Remove dead code and adjust naming Drop ib_send_cm_mra parameters which are always constant. Remove branch which is never taken. Adjust name to ib_prepare_cm_mra, which better reflects its functionality - no MRA is actually sent. Adjust name of related tracepoints. Push setting of the constant service timeout to cm.c and drop IB_CM_MRA_FLAG_DELAY. Signed-off-by: Vlad Dumitrescu Reviewed-by: Sean Hefty Link: https://patch.msgid.link/cdd2a237acf2b495c19ce02e4b1c42c41c6751c2.1747827207.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_cm.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index a2ac62b4a6cf..1fa3786f82f4 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -480,23 +480,12 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); -#define IB_CM_MRA_FLAG_DELAY 0x80 /* Send MRA only after a duplicate msg */ - /** - * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection - * message. + * ib_prepare_cm_mra - Prepares to send a message receipt acknowledgment to a + connection message in case duplicates are received. * @cm_id: Connection identifier associated with the connection message. - * @service_timeout: The lower 5-bits specify the maximum time required for - * the sender to reply to the connection message. The upper 3-bits - * specify additional control flags. - * @private_data: Optional user-defined private data sent with the - * message receipt acknowledgement. - * @private_data_len: Size of the private data buffer, in bytes. */ -int ib_send_cm_mra(struct ib_cm_id *cm_id, - u8 service_timeout, - const void *private_data, - u8 private_data_len); +int ib_prepare_cm_mra(struct ib_cm_id *cm_id); /** * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning -- cgit v1.2.3