summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
diff options
context:
space:
mode:
authorChang, Bruce <yu.bruce.chang@intel.com>2023-04-03 22:20:31 +0000
committerRodrigo Vivi <rodrigo.vivi@intel.com>2023-12-19 18:31:30 -0500
commit1a545ed74b33eaf6dee6d4159be07819ad89a569 (patch)
tree84f0e2ed249b7b1d9588ebf5af1d3a3b2a43b5ef /drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
parent96578d106b30dc3a6550624477a092d793052660 (diff)
drm/xe: fix pvc unload issue
Currently, unload pvc driver will generate a null dereference and the call stack is as below. [ 4850.618000] Call Trace: [ 4850.620740] <TASK> [ 4850.623134] ttm_bo_cleanup_memtype_use+0x3f/0x50 [ttm] [ 4850.628661] ttm_bo_release+0x154/0x2c0 [ttm] [ 4850.633317] ? drm_buddy_fini+0x62/0x80 [drm_buddy] [ 4850.638487] ? __kmem_cache_free+0x27d/0x2c0 [ 4850.643054] ttm_bo_put+0x38/0x60 [ttm] [ 4850.647190] xe_gem_object_free+0x1f/0x30 [xe] [ 4850.651945] drm_gem_object_free+0x1e/0x30 [drm] [ 4850.656904] ggtt_fini_noalloc+0x9d/0xe0 [xe] [ 4850.661574] drm_managed_release+0xb5/0x150 [drm] [ 4850.666617] drm_dev_release+0x30/0x50 [drm] [ 4850.671209] devm_drm_dev_init_release+0x3c/0x60 [drm] There are a couple issues, but the main one is due to TTM has only one TTM_PL_TT region, but since pvc has 2 tiles and tries to setup 1 TTM_PL_TT each tile. The second will overwrite the first one. During unload time, the first tile will reset the TTM_PL_TT manger and when the second tile is trying to free Bo and it will generate the null reference since the TTM manage is already got reset to 0. The fix is to use one global TTM_PL_TT manager. v2: make gtt mgr global and change the name to sys_mgr Cc: Stuart Summers <stuart.summers@intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Vivi, Rodrigo <rodrigo.vivi@intel.com> Signed-off-by: Bruce Chang <yu.bruce.chang@intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'drivers/gpu/drm/xe/xe_ttm_sys_mgr.c')
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_sys_mgr.c115
1 files changed, 115 insertions, 0 deletions
diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
new file mode 100644
index 000000000000..5b0674bbb8ed
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2022 Intel Corporation
+ * Copyright (C) 2021-2002 Red Hat
+ */
+
+#include "xe_ttm_sys_mgr.h"
+
+#include <drm/drm_managed.h>
+
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+
+struct xe_ttm_sys_node {
+ struct ttm_buffer_object *tbo;
+ struct ttm_range_mgr_node base;
+};
+
+static inline struct xe_ttm_sys_node *
+to_xe_ttm_sys_node(struct ttm_resource *res)
+{
+ return container_of(res, struct xe_ttm_sys_node, base.base);
+}
+
+static int xe_ttm_sys_mgr_new(struct ttm_resource_manager *man,
+ struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ struct ttm_resource **res)
+{
+ struct xe_ttm_sys_node *node;
+ int r;
+
+ node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ node->tbo = tbo;
+ ttm_resource_init(tbo, place, &node->base.base);
+
+ if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
+ ttm_resource_manager_usage(man) > (man->size << PAGE_SHIFT)) {
+ r = -ENOSPC;
+ goto err_fini;
+ }
+
+ node->base.mm_nodes[0].start = 0;
+ node->base.mm_nodes[0].size = PFN_UP(node->base.base.size);
+ node->base.base.start = XE_BO_INVALID_OFFSET;
+
+ *res = &node->base.base;
+
+ return 0;
+
+err_fini:
+ ttm_resource_fini(man, &node->base.base);
+ kfree(node);
+ return r;
+}
+
+static void xe_ttm_sys_mgr_del(struct ttm_resource_manager *man,
+ struct ttm_resource *res)
+{
+ struct xe_ttm_sys_node *node = to_xe_ttm_sys_node(res);
+
+ ttm_resource_fini(man, res);
+ kfree(node);
+}
+
+static void xe_ttm_sys_mgr_debug(struct ttm_resource_manager *man,
+ struct drm_printer *printer)
+{
+
+}
+
+static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = {
+ .alloc = xe_ttm_sys_mgr_new,
+ .free = xe_ttm_sys_mgr_del,
+ .debug = xe_ttm_sys_mgr_debug
+};
+
+static void ttm_sys_mgr_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_device *xe = (struct xe_device *)arg;
+ struct ttm_resource_manager *man = &xe->mem.sys_mgr;
+ int err;
+
+ ttm_resource_manager_set_used(man, false);
+
+ err = ttm_resource_manager_evict_all(&xe->ttm, man);
+ if (err)
+ return;
+
+ ttm_resource_manager_cleanup(man);
+ ttm_set_driver_manager(&xe->ttm, XE_PL_TT, NULL);
+}
+
+int xe_ttm_sys_mgr_init(struct xe_device *xe)
+{
+ struct ttm_resource_manager *man = &xe->mem.sys_mgr;
+ struct sysinfo si;
+ u64 gtt_size;
+
+ si_meminfo(&si);
+ gtt_size = (u64)si.totalram * si.mem_unit * 3/4;
+ man->use_tt = true;
+ man->func = &xe_ttm_sys_mgr_func;
+ ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT);
+ ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man);
+ ttm_resource_manager_set_used(man, true);
+ return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe);
+}