drm/amdkfd: Implement KFD process eviction/restore

When the TTM memory manager in KGD evicts BOs, all user mode queues potentially accessing these BOs must be evicted temporarily. Once user mode queues are evicted, the eviction fence is signaled, allowing the migration of the BO to proceed. A delayed worker is scheduled to restore all the BOs belonging to the evicted process and restart its queues. During suspend/resume of the GPU we also evict all processes to allow KGD to save BOs in system memory, since VRAM will be lost. v2: * Account for eviction when updating of q->is_active in MQD manager Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
author: Felix Kuehling <Felix.Kuehling@amd.com> 2018-02-06 20:32:45 -0500
committer: Oded Gabbay <oded.gabbay@gmail.com> 2018-02-06 20:32:45 -0500
commit: 26103436da003327017af325483b6150a3b855cc (patch)
tree: e7249d2b7e73e3969067680dd6496945981220f0 /drivers/gpu/drm/amd/amdkfd/kfd_process.c
parent: 403575c44e61722ae443b47df66e188b367d7324 (diff)
1 files changed, 213 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index cf4fa25cc430..18b2b86ad503 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -55,6 +55,9 @@ static struct kfd_process *create_process(const struct task_struct *thread,
 					struct file *filep);
 static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
 
+static void evict_process_worker(struct work_struct *work);
+static void restore_process_worker(struct work_struct *work);
+
 
 void kfd_process_create_wq(void)
 {
@@ -230,6 +233,9 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 	mutex_unlock(&kfd_processes_mutex);
 	synchronize_srcu(&kfd_processes_srcu);
 
+	cancel_delayed_work_sync(&p->eviction_work);
+	cancel_delayed_work_sync(&p->restore_work);
+
 	mutex_lock(&p->mutex);
 
 	/* Iterate over all process device data structures and if the
@@ -351,6 +357,10 @@ static struct kfd_process *create_process(const struct task_struct *thread,
 	if (err != 0)
 		goto err_init_apertures;
 
+	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
+	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
+	process->last_restore_timestamp = get_jiffies_64();
+
 	err = kfd_process_init_cwsr(process, filep);
 	if (err)
 		goto err_init_cwsr;
@@ -402,6 +412,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 	INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
 	pdd->qpd.dqm = dev->dqm;
 	pdd->qpd.pqm = &p->pqm;
+	pdd->qpd.evicted = 0;
 	pdd->process = p;
 	pdd->bound = PDD_UNBOUND;
 	pdd->already_dequeued = false;
@@ -490,6 +501,208 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
 	return ret_p;
 }
 
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
+{
+	struct kfd_process *p;
+
+	int idx = srcu_read_lock(&kfd_processes_srcu);
+
+	p = find_process_by_mm(mm);
+	if (p)
+		kref_get(&p->ref);
+
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+
+	return p;
+}
+
+/* process_evict_queues - Evict all user queues of a process
+ *
+ * Eviction is reference-counted per process-device. This means multiple
+ * evictions from different sources can be nested safely.
+ */
+static int process_evict_queues(struct kfd_process *p)
+{
+	struct kfd_process_device *pdd;
+	int r = 0;
+	unsigned int n_evicted = 0;
+
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+		r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
+							    &pdd->qpd);
+		if (r) {
+			pr_err("Failed to evict process queues\n");
+			goto fail;
+		}
+		n_evicted++;
+	}
+
+	return r;
+
+fail:
+	/* To keep state consistent, roll back partial eviction by
+	 * restoring queues
+	 */
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+		if (n_evicted == 0)
+			break;
+		if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
+							      &pdd->qpd))
+			pr_err("Failed to restore queues\n");
+
+		n_evicted--;
+	}
+
+	return r;
+}
+
+/* process_restore_queues - Restore all user queues of a process */
+static  int process_restore_queues(struct kfd_process *p)
+{
+	struct kfd_process_device *pdd;
+	int r, ret = 0;
+
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+		r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
+							      &pdd->qpd);
+		if (r) {
+			pr_err("Failed to restore process queues\n");
+			if (!ret)
+				ret = r;
+		}
+	}
+
+	return ret;
+}
+
+static void evict_process_worker(struct work_struct *work)
+{
+	int ret;
+	struct kfd_process *p;
+	struct delayed_work *dwork;
+
+	dwork = to_delayed_work(work);
+
+	/* Process termination destroys this worker thread. So during the
+	 * lifetime of this thread, kfd_process p will be valid
+	 */
+	p = container_of(dwork, struct kfd_process, eviction_work);
+	WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
+		  "Eviction fence mismatch\n");
+
+	/* Narrow window of overlap between restore and evict work
+	 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
+	 * unreserves KFD BOs, it is possible to evicted again. But
+	 * restore has few more steps of finish. So lets wait for any
+	 * previous restore work to complete
+	 */
+	flush_delayed_work(&p->restore_work);
+
+	pr_debug("Started evicting pasid %d\n", p->pasid);
+	ret = process_evict_queues(p);
+	if (!ret) {
+		dma_fence_signal(p->ef);
+		dma_fence_put(p->ef);
+		p->ef = NULL;
+		schedule_delayed_work(&p->restore_work,
+				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
+
+		pr_debug("Finished evicting pasid %d\n", p->pasid);
+	} else
+		pr_err("Failed to evict queues of pasid %d\n", p->pasid);
+}
+
+static void restore_process_worker(struct work_struct *work)
+{
+	struct delayed_work *dwork;
+	struct kfd_process *p;
+	struct kfd_process_device *pdd;
+	int ret = 0;
+
+	dwork = to_delayed_work(work);
+
+	/* Process termination destroys this worker thread. So during the
+	 * lifetime of this thread, kfd_process p will be valid
+	 */
+	p = container_of(dwork, struct kfd_process, restore_work);
+
+	/* Call restore_process_bos on the first KGD device. This function
+	 * takes care of restoring the whole process including other devices.
+	 * Restore can fail if enough memory is not available. If so,
+	 * reschedule again.
+	 */
+	pdd = list_first_entry(&p->per_device_data,
+			       struct kfd_process_device,
+			       per_device_list);
+
+	pr_debug("Started restoring pasid %d\n", p->pasid);
+
+	/* Setting last_restore_timestamp before successful restoration.
+	 * Otherwise this would have to be set by KGD (restore_process_bos)
+	 * before KFD BOs are unreserved. If not, the process can be evicted
+	 * again before the timestamp is set.
+	 * If restore fails, the timestamp will be set again in the next
+	 * attempt. This would mean that the minimum GPU quanta would be
+	 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
+	 * functions)
+	 */
+
+	p->last_restore_timestamp = get_jiffies_64();
+	ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info,
+						     &p->ef);
+	if (ret) {
+		pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
+			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
+		ret = schedule_delayed_work(&p->restore_work,
+				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
+		WARN(!ret, "reschedule restore work failed\n");
+		return;
+	}
+
+	ret = process_restore_queues(p);
+	if (!ret)
+		pr_debug("Finished restoring pasid %d\n", p->pasid);
+	else
+		pr_err("Failed to restore queues of pasid %d\n", p->pasid);
+}
+
+void kfd_suspend_all_processes(void)
+{
+	struct kfd_process *p;
+	unsigned int temp;
+	int idx = srcu_read_lock(&kfd_processes_srcu);
+
+	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+		cancel_delayed_work_sync(&p->eviction_work);
+		cancel_delayed_work_sync(&p->restore_work);
+
+		if (process_evict_queues(p))
+			pr_err("Failed to suspend process %d\n", p->pasid);
+		dma_fence_signal(p->ef);
+		dma_fence_put(p->ef);
+		p->ef = NULL;
+	}
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+}
+
+int kfd_resume_all_processes(void)
+{
+	struct kfd_process *p;
+	unsigned int temp;
+	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
+
+	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+		if (!schedule_delayed_work(&p->restore_work, 0)) {
+			pr_err("Restore process %d failed during resume\n",
+			       p->pasid);
+			ret = -EFAULT;
+		}
+	}
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+	return ret;
+}
+
 int kfd_reserved_mem_mmap(struct kfd_process *process,
 			  struct vm_area_struct *vma)
 {
author	Felix Kuehling <Felix.Kuehling@amd.com>	2018-02-06 20:32:45 -0500
committer	Oded Gabbay <oded.gabbay@gmail.com>	2018-02-06 20:32:45 -0500
commit	26103436da003327017af325483b6150a3b855cc (patch)
tree	e7249d2b7e73e3969067680dd6496945981220f0 /drivers/gpu/drm/amd/amdkfd/kfd_process.c
parent	403575c44e61722ae443b47df66e188b367d7324 (diff)