From 6f582513ad15de729ee5c91dfef946f3c266a207 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 17 May 2023 16:19:51 -0400 Subject: drm/amdkfd: add event age tracking Add event age tracking Signed-off-by: James Zhu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 1781e7669982..93f1c0bc5caf 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -320,12 +320,20 @@ struct kfd_hsa_hw_exception_data { __u32 gpu_id; }; +/* hsa signal event data */ +struct kfd_hsa_signal_event_data { + __u64 last_event_age; /* to and from KFD */ +}; + /* Event data */ struct kfd_event_data { union { + /* From KFD */ struct kfd_hsa_memory_exception_data memory_exception_data; struct kfd_hsa_hw_exception_data hw_exception_data; - }; /* From KFD */ + /* To and From KFD */ + struct kfd_hsa_signal_event_data signal_event_data; + }; __u64 kfd_event_data_ext; /* pointer to an extension structure for future exception types */ __u32 event_id; /* to KFD */ -- cgit v1.2.3 From d297eedf83f5af96751c0da1e4355c19244a55a2 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 7 Jun 2023 13:27:40 -0400 Subject: drm/amdkfd: bump kfd ioctl minor version for event age availability Bump the minor version to declare event age tracking feature is now available. In kernel amdgpu driver, kfd_wait_on_events is used to support user space signal event wait function. For multiple threads waiting on same event scenery, race condition could occur since some threads after checking signal condition, before calling kfd_wait_on_events, the event interrupt could be fired and wake up other thread which are sleeping on this event. Then those threads could fall into sleep without waking up again. Adding event age tracking in both kernel and user mode, will help avoiding this race condition. Proposed ROCT-Thunk-Interface: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/commit/efdbf6cfbc026bd68ac3c35d00dacf84370eb81e https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/commit/1820ae0a2db85b6f584611dc0cde1a00e7c22915 Proposed ROCR-Runtime: https://github.com/RadeonOpenCompute/ROCR-Runtime/compare/master...zhums:ROCR-Runtime:new_event_wait_review https://github.com/RadeonOpenCompute/ROCR-Runtime/commit/e1f5bdb88eb882ac798aeca2c00ea3fbb2dba459 https://github.com/RadeonOpenCompute/ROCR-Runtime/commit/7d26afd14107b5c2a754c1a3f415d89f3aabb503 Signed-off-by: James Zhu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 93f1c0bc5caf..eeb2fdcbdcb7 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -39,9 +39,10 @@ * - 1.11 - Add unified memory for ctx save/restore area * - 1.12 - Add DMA buf export ioctl * - 1.13 - Add debugger API + * - 1.14 - Update kfd_event_data */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 13 +#define KFD_IOCTL_MINOR_VERSION 14 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ -- cgit v1.2.3 From 72f1de49ffb90b29748284f27f1d6b829ab1de95 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Mon, 17 Apr 2023 17:08:12 +0800 Subject: drm/dp_mst: Clear MSG_RDY flag before sending new message [Why] The sequence for collecting down_reply from source perspective should be: Request_n->repeat (get partial reply of Request_n->clear message ready flag to ack DPRX that the message is received) till all partial replies for Request_n are received->new Request_n+1. Now there is chance that drm_dp_mst_hpd_irq() will fire new down request in the tx queue when the down reply is incomplete. Source is restricted to generate interveleaved message transactions so we should avoid it. Also, while assembling partial reply packets, reading out DPCD DOWN_REP Sideband MSG buffer + clearing DOWN_REP_MSG_RDY flag should be wrapped up as a complete operation for reading out a reply packet. Kicking off a new request before clearing DOWN_REP_MSG_RDY flag might be risky. e.g. If the reply of the new request has overwritten the DPRX DOWN_REP Sideband MSG buffer before source writing one to clear DOWN_REP_MSG_RDY flag, source then unintentionally flushes the reply for the new request. Should handle the up request in the same way. [How] Separete drm_dp_mst_hpd_irq() into 2 steps. After acking the MST IRQ event, driver calls drm_dp_mst_hpd_irq_send_new_request() and might trigger drm_dp_mst_kick_tx() only when there is no on going message transaction. Changes since v1: * Reworked on review comments received -> Adjust the fix to let driver explicitly kick off new down request when mst irq event is handled and acked -> Adjust the commit message Changes since v2: * Adjust the commit message * Adjust the naming of the divided 2 functions and add a new input parameter "ack". * Adjust code flow as per review comments. Changes since v3: * Update the function description of drm_dp_mst_hpd_irq_handle_event Changes since v4: * Change ack of drm_dp_mst_hpd_irq_handle_event() to be an array align the size of esi[] Signed-off-by: Wayne Lin Reviewed-by: Lyude Paul Acked-by: Jani Nikula Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- include/drm/display/drm_dp_mst_helper.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index f962e97880b4..ed5c9660563c 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -810,8 +810,11 @@ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr); bool drm_dp_read_mst_cap(struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_CAP_SIZE]); int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool mst_state); -int drm_dp_mst_hpd_irq(struct drm_dp_mst_topology_mgr *mgr, u8 *esi, bool *handled); - +int drm_dp_mst_hpd_irq_handle_event(struct drm_dp_mst_topology_mgr *mgr, + const u8 *esi, + u8 *ack, + bool *handled); +void drm_dp_mst_hpd_irq_send_new_request(struct drm_dp_mst_topology_mgr *mgr); int drm_dp_mst_detect_port(struct drm_connector *connector, -- cgit v1.2.3