summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorMatthew Wilcox (Oracle) <willy@infradead.org>2025-11-10 20:32:01 +0000
committerAndrew Morton <akpm@linux-foundation.org>2025-11-20 13:43:59 -0800
commit2197bb60f89077603cc580ff752c5cf6388c1099 (patch)
treeb0f927b85b2f299bb5d800c24ed2c3f3170fb604 /include
parent3a47e8771c43bc9775f667b8de35c873975aa42e (diff)
mm: add vma_start_write_killable()
Patch series "vma_start_write_killable"", v2. When we added the VMA lock, we made a major oversight in not adding a killable variant. That can run us into trouble where a thread takes the VMA lock for read (eg handling a page fault) and then goes out to lunch for an hour (eg doing reclaim). Another thread tries to modify the VMA, taking the mmap_lock for write, then attempts to lock the VMA for write. That blocks on the first thread, and ensures that every other page fault now tries to take the mmap_lock for read. Because everything's in an uninterruptible sleep, we can't kill the task, which makes me angry. This patchset just adds vma_start_write_killable() and converts one caller to use it. Most users are somewhat tricky to convert, so expect follow-up individual patches per call-site which need careful analysis to make sure we've done proper cleanup. This patch (of 2): The vma can be held read-locked for a substantial period of time, eg if memory allocation needs to go into reclaim. It's useful to be able to send fatal signals to threads which are waiting for the write lock. Link: https://lkml.kernel.org/r/20251110203204.1454057-1-willy@infradead.org Link: https://lkml.kernel.org/r/20251110203204.1454057-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Reviewed-by: Suren Baghdasaryan <surenb@google.com> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Chris Li <chriscli@google.com> Cc: Jann Horn <jannh@google.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Shakeel Butt <shakeel.butt@linux.dev> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/mmap_lock.h30
1 files changed, 28 insertions, 2 deletions
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index e05da70dc0cb..d53f72dba7fe 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -195,7 +195,8 @@ static inline bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned in
return (vma->vm_lock_seq == *mm_lock_seq);
}
-void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq);
+int __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq,
+ int state);
/*
* Begin writing to a VMA.
@@ -209,7 +210,30 @@ static inline void vma_start_write(struct vm_area_struct *vma)
if (__is_vma_write_locked(vma, &mm_lock_seq))
return;
- __vma_start_write(vma, mm_lock_seq);
+ __vma_start_write(vma, mm_lock_seq, TASK_UNINTERRUPTIBLE);
+}
+
+/**
+ * vma_start_write_killable - Begin writing to a VMA.
+ * @vma: The VMA we are going to modify.
+ *
+ * Exclude concurrent readers under the per-VMA lock until the currently
+ * write-locked mmap_lock is dropped or downgraded.
+ *
+ * Context: May sleep while waiting for readers to drop the vma read lock.
+ * Caller must already hold the mmap_lock for write.
+ *
+ * Return: 0 for a successful acquisition. -EINTR if a fatal signal was
+ * received.
+ */
+static inline __must_check
+int vma_start_write_killable(struct vm_area_struct *vma)
+{
+ unsigned int mm_lock_seq;
+
+ if (__is_vma_write_locked(vma, &mm_lock_seq))
+ return 0;
+ return __vma_start_write(vma, mm_lock_seq, TASK_KILLABLE);
}
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
@@ -283,6 +307,8 @@ static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int
static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {}
static inline void vma_end_read(struct vm_area_struct *vma) {}
static inline void vma_start_write(struct vm_area_struct *vma) {}
+static inline __must_check
+int vma_start_write_killable(struct vm_area_struct *vma) { return 0; }
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
{ mmap_assert_write_locked(vma->vm_mm); }
static inline void vma_assert_attached(struct vm_area_struct *vma) {}