summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/aio.c2
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/coredump.c4
-rw-r--r--fs/dax.c47
-rw-r--r--fs/exec.c2
-rw-r--r--fs/fuse/dev.c2
-rw-r--r--fs/gfs2/glops.c2
-rw-r--r--fs/hugetlbfs/inode.c49
-rw-r--r--fs/jffs2/file.c4
-rw-r--r--fs/jfs/jfs_metapage.c8
-rw-r--r--fs/mpage.c14
-rw-r--r--fs/nilfs2/page.c2
-rw-r--r--fs/ntfs3/inode.c15
-rw-r--r--fs/pidfs.c7
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c12
-rw-r--r--fs/proc/internal.h15
-rw-r--r--fs/proc/page.c5
-rw-r--r--fs/proc/task_mmu.c190
-rw-r--r--fs/proc/task_nommu.c14
-rw-r--r--fs/ubifs/file.c6
23 files changed, 237 insertions, 171 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 7815379032da..0bfdaecaa877 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -249,7 +249,6 @@ config ARCH_SUPPORTS_HUGETLBFS
menuconfig HUGETLBFS
bool "HugeTLB file system support"
depends on ARCH_SUPPORTS_HUGETLBFS
- depends on (SYSFS || SYSCTL)
select MEMFD_CREATE
select PADATA if SMP
help
diff --git a/fs/aio.c b/fs/aio.c
index 6002617f078c..5bc133386407 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -445,7 +445,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
folio_get(dst);
rc = folio_migrate_mapping(mapping, dst, src, 1);
- if (rc != MIGRATEPAGE_SUCCESS) {
+ if (rc) {
folio_put(dst);
goto out_unlock;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9247a58894de..ebbf55f8864b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1930,6 +1930,7 @@ static int btrfs_init_btree_inode(struct super_block *sb)
BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
__insert_inode_hash(inode, hash);
+ set_bit(AS_KERNEL_FILE, &inode->i_mapping->flags);
fs_info->btree_inode = inode;
return 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ced87c9e4682..3b1b3a0553ee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7456,7 +7456,7 @@ static int btrfs_migrate_folio(struct address_space *mapping,
{
int ret = filemap_migrate_folio(mapping, dst, src, mode);
- if (ret != MIGRATEPAGE_SUCCESS)
+ if (ret)
return ret;
if (folio_test_ordered(src)) {
@@ -7464,7 +7464,7 @@ static int btrfs_migrate_folio(struct address_space *mapping,
folio_set_ordered(dst);
}
- return MIGRATEPAGE_SUCCESS;
+ return 0;
}
#else
#define btrfs_migrate_folio NULL
diff --git a/fs/coredump.c b/fs/coredump.c
index 0d9a5d07a75d..b5fc06a092a4 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -1103,8 +1103,10 @@ void vfs_coredump(const kernel_siginfo_t *siginfo)
* We must use the same mm->flags while dumping core to avoid
* inconsistency of bit flags, since this flag is not protected
* by any locks.
+ *
+ * Note that we only care about MMF_DUMP* flags.
*/
- .mm_flags = mm->flags,
+ .mm_flags = __mm_flags_get_dumpable(mm),
.vma_meta = NULL,
.cpu = raw_smp_processor_id(),
};
diff --git a/fs/dax.c b/fs/dax.c
index 20ecf652c129..89f071ba7b10 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1375,51 +1375,24 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
const struct iomap_iter *iter, void **entry)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
- unsigned long pmd_addr = vmf->address & PMD_MASK;
- struct vm_area_struct *vma = vmf->vma;
struct inode *inode = mapping->host;
- pgtable_t pgtable = NULL;
struct folio *zero_folio;
- spinlock_t *ptl;
- pmd_t pmd_entry;
- unsigned long pfn;
+ vm_fault_t ret;
zero_folio = mm_get_huge_zero_folio(vmf->vma->vm_mm);
- if (unlikely(!zero_folio))
- goto fallback;
-
- pfn = page_to_pfn(&zero_folio->page);
- *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn,
- DAX_PMD | DAX_ZERO_PAGE);
-
- if (arch_needs_pgtable_deposit()) {
- pgtable = pte_alloc_one(vma->vm_mm);
- if (!pgtable)
- return VM_FAULT_OOM;
- }
-
- ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
- if (!pmd_none(*(vmf->pmd))) {
- spin_unlock(ptl);
- goto fallback;
+ if (unlikely(!zero_folio)) {
+ trace_dax_pmd_load_hole_fallback(inode, vmf, zero_folio, *entry);
+ return VM_FAULT_FALLBACK;
}
- if (pgtable) {
- pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
- mm_inc_nr_ptes(vma->vm_mm);
- }
- pmd_entry = folio_mk_pmd(zero_folio, vmf->vma->vm_page_prot);
- set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
- spin_unlock(ptl);
- trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry);
- return VM_FAULT_NOPAGE;
+ *entry = dax_insert_entry(xas, vmf, iter, *entry, folio_pfn(zero_folio),
+ DAX_PMD | DAX_ZERO_PAGE);
-fallback:
- if (pgtable)
- pte_free(vma->vm_mm, pgtable);
- trace_dax_pmd_load_hole_fallback(inode, vmf, zero_folio, *entry);
- return VM_FAULT_FALLBACK;
+ ret = vmf_insert_folio_pmd(vmf, zero_folio, false);
+ if (ret == VM_FAULT_NOPAGE)
+ trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry);
+ return ret;
}
#else
static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
diff --git a/fs/exec.c b/fs/exec.c
index 4a89918b761f..6b70c6726d31 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1999,7 +1999,7 @@ void set_dumpable(struct mm_struct *mm, int value)
if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
return;
- set_mask_bits(&mm->flags, MMF_DUMPABLE_MASK, value);
+ __mm_flags_set_mask_dumpable(mm, value);
}
SYSCALL_DEFINE3(execve,
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 66a1ba8c56b5..ad8645c0f9fe 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -935,7 +935,7 @@ static int fuse_check_folio(struct folio *folio)
{
if (folio_mapped(folio) ||
folio->mapping != NULL ||
- (folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
+ (folio->flags.f & PAGE_FLAGS_CHECK_AT_PREP &
~(1 << PG_locked |
1 << PG_referenced |
1 << PG_lru |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index fe0faad4892f..0c0a80b3baca 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -40,7 +40,7 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
"AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page "
"state 0x%lx\n",
bh, (unsigned long long)bh->b_blocknr, bh->b_state,
- bh->b_folio->mapping, bh->b_folio->flags);
+ bh->b_folio->mapping, bh->b_folio->flags.f);
fs_err(sdp, "AIL glock %u:%llu mapping %p\n",
gl->gl_name.ln_type, gl->gl_name.ln_number,
gfs2_glock2aspace(gl));
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index be4be99304bc..9c94ed8c3ab0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -192,37 +192,25 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
* Someone wants to read @bytes from a HWPOISON hugetlb @folio from @offset.
* Returns the maximum number of bytes one can read without touching the 1st raw
* HWPOISON page.
- *
- * The implementation borrows the iteration logic from copy_page_to_iter*.
*/
static size_t adjust_range_hwpoison(struct folio *folio, size_t offset,
size_t bytes)
{
- struct page *page;
- size_t n = 0;
- size_t res = 0;
-
- /* First page to start the loop. */
- page = folio_page(folio, offset / PAGE_SIZE);
- offset %= PAGE_SIZE;
- while (1) {
- if (is_raw_hwpoison_page_in_hugepage(page))
- break;
+ struct page *page = folio_page(folio, offset / PAGE_SIZE);
+ size_t safe_bytes;
- /* Safe to read n bytes without touching HWPOISON subpage. */
- n = min(bytes, (size_t)PAGE_SIZE - offset);
- res += n;
- bytes -= n;
- if (!bytes || !n)
+ if (is_raw_hwpoison_page_in_hugepage(page))
+ return 0;
+ /* Safe to read the remaining bytes in this page. */
+ safe_bytes = PAGE_SIZE - (offset % PAGE_SIZE);
+ page++;
+
+ /* Check each remaining page as long as we are not done yet. */
+ for (; safe_bytes < bytes; safe_bytes += PAGE_SIZE, page++)
+ if (is_raw_hwpoison_page_in_hugepage(page))
break;
- offset += n;
- if (offset == PAGE_SIZE) {
- page = nth_page(page, 1);
- offset = 0;
- }
- }
- return res;
+ return min(safe_bytes, bytes);
}
/*
@@ -490,6 +478,14 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
if (!hugetlb_vma_trylock_write(vma))
continue;
+ /*
+ * Skip VMAs without shareable locks. Per the design in commit
+ * 40549ba8f8e0, these will be handled by remove_inode_hugepages()
+ * called after this function with proper locking.
+ */
+ if (!__vma_shareable_lock(vma))
+ goto skip;
+
v_start = vma_offset_start(vma, start);
v_end = vma_offset_end(vma, end);
@@ -500,6 +496,7 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
* vmas. Therefore, lock is not held when calling
* unmap_hugepage_range for private vmas.
*/
+skip:
hugetlb_vma_unlock_write(vma);
}
}
@@ -1054,7 +1051,7 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping,
int rc;
rc = migrate_huge_page_move_mapping(mapping, dst, src);
- if (rc != MIGRATEPAGE_SUCCESS)
+ if (rc)
return rc;
if (hugetlb_folio_subpool(src)) {
@@ -1065,7 +1062,7 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping,
folio_migrate_flags(dst, src);
- return MIGRATEPAGE_SUCCESS;
+ return 0;
}
#else
#define hugetlbfs_migrate_folio NULL
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index dd3dff95cb24..b697f3c259ef 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -230,7 +230,7 @@ static int jffs2_write_begin(const struct kiocb *iocb,
goto release_sem;
}
}
- jffs2_dbg(1, "end write_begin(). folio->flags %lx\n", folio->flags);
+ jffs2_dbg(1, "end write_begin(). folio->flags %lx\n", folio->flags.f);
release_sem:
mutex_unlock(&c->alloc_sem);
@@ -259,7 +259,7 @@ static int jffs2_write_end(const struct kiocb *iocb,
jffs2_dbg(1, "%s(): ino #%lu, page at 0x%llx, range %d-%d, flags %lx\n",
__func__, inode->i_ino, folio_pos(folio),
- start, end, folio->flags);
+ start, end, folio->flags.f);
/* We need to avoid deadlock with page_cache_read() in
jffs2_garbage_collect_pass(). So the folio must be
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index b98cf3bb6c1f..871cf4fb3636 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -169,7 +169,7 @@ static int __metapage_migrate_folio(struct address_space *mapping,
}
rc = filemap_migrate_folio(mapping, dst, src, mode);
- if (rc != MIGRATEPAGE_SUCCESS)
+ if (rc)
return rc;
for (i = 0; i < MPS_PER_PAGE; i++) {
@@ -199,7 +199,7 @@ static int __metapage_migrate_folio(struct address_space *mapping,
}
}
- return MIGRATEPAGE_SUCCESS;
+ return 0;
}
#endif /* CONFIG_MIGRATION */
@@ -242,7 +242,7 @@ static int __metapage_migrate_folio(struct address_space *mapping,
return -EAGAIN;
rc = filemap_migrate_folio(mapping, dst, src, mode);
- if (rc != MIGRATEPAGE_SUCCESS)
+ if (rc)
return rc;
if (unlikely(insert_metapage(dst, mp)))
@@ -253,7 +253,7 @@ static int __metapage_migrate_folio(struct address_space *mapping,
mp->folio = dst;
remove_metapage(src, mp);
- return MIGRATEPAGE_SUCCESS;
+ return 0;
}
#endif /* CONFIG_MIGRATION */
diff --git a/fs/mpage.c b/fs/mpage.c
index c5fd821fd30e..7dae5afc2b9e 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -148,7 +148,7 @@ struct mpage_readpage_args {
* represent the validity of its disk mapping and to decide when to do the next
* get_block() call.
*/
-static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
+static void do_mpage_readpage(struct mpage_readpage_args *args)
{
struct folio *folio = args->folio;
struct inode *inode = folio->mapping->host;
@@ -305,7 +305,7 @@ alloc_new:
else
args->last_block_in_bio = first_block + blocks_per_folio - 1;
out:
- return args->bio;
+ return;
confused:
if (args->bio)
@@ -368,7 +368,13 @@ void mpage_readahead(struct readahead_control *rac, get_block_t get_block)
prefetchw(&folio->flags);
args.folio = folio;
args.nr_pages = readahead_count(rac);
- args.bio = do_mpage_readpage(&args);
+ do_mpage_readpage(&args);
+ /*
+ * If read ahead failed synchronously, it may cause by removed
+ * device, or some filesystem metadata error.
+ */
+ if (!folio_test_locked(folio) && !folio_test_uptodate(folio))
+ break;
}
if (args.bio)
mpage_bio_submit_read(args.bio);
@@ -386,7 +392,7 @@ int mpage_read_folio(struct folio *folio, get_block_t get_block)
.get_block = get_block,
};
- args.bio = do_mpage_readpage(&args);
+ do_mpage_readpage(&args);
if (args.bio)
mpage_bio_submit_read(args.bio);
return 0;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 806b056d2260..56c4da417b6a 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -167,7 +167,7 @@ void nilfs_folio_bug(struct folio *folio)
printk(KERN_CRIT "NILFS_FOLIO_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
"mapping=%p ino=%lu\n",
folio, folio_ref_count(folio),
- (unsigned long long)folio->index, folio->flags, m, ino);
+ (unsigned long long)folio->index, folio->flags.f, m, ino);
head = folio_buffers(folio);
if (head) {
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 37cbbee7fa58..48b4f73a93ee 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -871,9 +871,9 @@ out:
}
static int ntfs_resident_writepage(struct folio *folio,
- struct writeback_control *wbc, void *data)
+ struct writeback_control *wbc)
{
- struct address_space *mapping = data;
+ struct address_space *mapping = folio->mapping;
struct inode *inode = mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
int ret;
@@ -907,9 +907,14 @@ static int ntfs_writepages(struct address_space *mapping,
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
return -EIO;
- if (is_resident(ntfs_i(inode)))
- return write_cache_pages(mapping, wbc, ntfs_resident_writepage,
- mapping);
+ if (is_resident(ntfs_i(inode))) {
+ struct folio *folio = NULL;
+ int error;
+
+ while ((folio = writeback_iter(mapping, wbc, folio, &error)))
+ error = ntfs_resident_writepage(folio, wbc);
+ return error;
+ }
return mpage_writepages(mapping, wbc, ntfs_get_block);
}
diff --git a/fs/pidfs.c b/fs/pidfs.c
index c40c29c702e5..44a95cd27377 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -357,8 +357,11 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
if ((kinfo.mask & PIDFD_INFO_COREDUMP) && !(kinfo.coredump_mask)) {
task_lock(task);
- if (task->mm)
- kinfo.coredump_mask = pidfs_coredump_mask(task->mm->flags);
+ if (task->mm) {
+ unsigned long flags = __mm_flags_get_dumpable(task->mm);
+
+ kinfo.coredump_mask = pidfs_coredump_mask(flags);
+ }
task_unlock(task);
}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 69269745d73b..2ae63189091e 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -420,7 +420,7 @@ static inline void task_thp_status(struct seq_file *m, struct mm_struct *mm)
bool thp_enabled = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE);
if (thp_enabled)
- thp_enabled = !test_bit(MMF_DISABLE_THP, &mm->flags);
+ thp_enabled = !mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm);
seq_printf(m, "THP_enabled:\t%d\n", thp_enabled);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 62d35631ba8c..b997ceef9135 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1163,7 +1163,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
struct task_struct *p = find_lock_task_mm(task);
if (p) {
- if (test_bit(MMF_MULTIPROCESS, &p->mm->flags)) {
+ if (mm_flags_test(MMF_MULTIPROCESS, p->mm)) {
mm = p->mm;
mmgrab(mm);
}
@@ -2962,8 +2962,10 @@ static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
ret = 0;
mm = get_task_mm(task);
if (mm) {
+ unsigned long flags = __mm_flags_get_dumpable(mm);
+
len = snprintf(buffer, sizeof(buffer), "%08lx\n",
- ((mm->flags & MMF_DUMP_FILTER_MASK) >>
+ ((flags & MMF_DUMP_FILTER_MASK) >>
MMF_DUMP_FILTER_SHIFT));
mmput(mm);
ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
@@ -3002,9 +3004,9 @@ static ssize_t proc_coredump_filter_write(struct file *file,
for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
if (val & mask)
- set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
+ mm_flags_set(i + MMF_DUMP_FILTER_SHIFT, mm);
else
- clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
+ mm_flags_clear(i + MMF_DUMP_FILTER_SHIFT, mm);
}
mmput(mm);
@@ -3274,7 +3276,7 @@ static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns,
seq_printf(m, "ksm_merging_pages %lu\n", mm->ksm_merging_pages);
seq_printf(m, "ksm_process_profit %ld\n", ksm_process_profit(mm));
seq_printf(m, "ksm_merge_any: %s\n",
- test_bit(MMF_VM_MERGE_ANY, &mm->flags) ? "yes" : "no");
+ mm_flags_test(MMF_VM_MERGE_ANY, mm) ? "yes" : "no");
ret = mmap_read_lock_killable(mm);
if (ret) {
mmput(mm);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index e737401d7383..d1598576506c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -378,16 +378,21 @@ extern void proc_self_init(void);
* task_[no]mmu.c
*/
struct mem_size_stats;
-struct proc_maps_private {
- struct inode *inode;
- struct task_struct *task;
+
+struct proc_maps_locking_ctx {
struct mm_struct *mm;
- struct vma_iterator iter;
- loff_t last_pos;
#ifdef CONFIG_PER_VMA_LOCK
bool mmap_locked;
struct vm_area_struct *locked_vma;
#endif
+};
+
+struct proc_maps_private {
+ struct inode *inode;
+ struct task_struct *task;
+ struct vma_iterator iter;
+ loff_t last_pos;
+ struct proc_maps_locking_ctx lock_ctx;
#ifdef CONFIG_NUMA
struct mempolicy *task_mempolicy;
#endif
diff --git a/fs/proc/page.c b/fs/proc/page.c
index ba3568e97fd1..fc64f23e05e5 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -163,7 +163,7 @@ u64 stable_page_flags(const struct page *page)
snapshot_page(&ps, page);
folio = &ps.folio_snapshot;
- k = folio->flags;
+ k = folio->flags.f;
mapping = (unsigned long)folio->mapping;
is_anon = mapping & FOLIO_MAPPING_ANON;
@@ -238,7 +238,7 @@ u64 stable_page_flags(const struct page *page)
if (u & (1 << KPF_HUGE))
u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison);
else
- u |= kpf_copy_bit(ps.page_snapshot.flags, KPF_HWPOISON, PG_hwpoison);
+ u |= kpf_copy_bit(ps.page_snapshot.flags.f, KPF_HWPOISON, PG_hwpoison);
#endif
u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved);
@@ -256,6 +256,7 @@ u64 stable_page_flags(const struct page *page)
return u;
}
+EXPORT_SYMBOL_GPL(stable_page_flags);
/* /proc/kpageflags - an array exposing page flags
*
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index b26ae556b446..fc35a0543f01 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -132,18 +132,24 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
#ifdef CONFIG_PER_VMA_LOCK
-static void unlock_vma(struct proc_maps_private *priv)
+static void reset_lock_ctx(struct proc_maps_locking_ctx *lock_ctx)
{
- if (priv->locked_vma) {
- vma_end_read(priv->locked_vma);
- priv->locked_vma = NULL;
+ lock_ctx->locked_vma = NULL;
+ lock_ctx->mmap_locked = false;
+}
+
+static void unlock_ctx_vma(struct proc_maps_locking_ctx *lock_ctx)
+{
+ if (lock_ctx->locked_vma) {
+ vma_end_read(lock_ctx->locked_vma);
+ lock_ctx->locked_vma = NULL;
}
}
static const struct seq_operations proc_pid_maps_op;
static inline bool lock_vma_range(struct seq_file *m,
- struct proc_maps_private *priv)
+ struct proc_maps_locking_ctx *lock_ctx)
{
/*
* smaps and numa_maps perform page table walk, therefore require
@@ -151,25 +157,24 @@ static inline bool lock_vma_range(struct seq_file *m,
* walking the vma tree under rcu read protection.
*/
if (m->op != &proc_pid_maps_op) {
- if (mmap_read_lock_killable(priv->mm))
+ if (mmap_read_lock_killable(lock_ctx->mm))
return false;
- priv->mmap_locked = true;
+ lock_ctx->mmap_locked = true;
} else {
rcu_read_lock();
- priv->locked_vma = NULL;
- priv->mmap_locked = false;
+ reset_lock_ctx(lock_ctx);
}
return true;
}
-static inline void unlock_vma_range(struct proc_maps_private *priv)
+static inline void unlock_vma_range(struct proc_maps_locking_ctx *lock_ctx)
{
- if (priv->mmap_locked) {
- mmap_read_unlock(priv->mm);
+ if (lock_ctx->mmap_locked) {
+ mmap_read_unlock(lock_ctx->mm);
} else {
- unlock_vma(priv);
+ unlock_ctx_vma(lock_ctx);
rcu_read_unlock();
}
}
@@ -177,15 +182,16 @@ static inline void unlock_vma_range(struct proc_maps_private *priv)
static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv,
loff_t last_pos)
{
+ struct proc_maps_locking_ctx *lock_ctx = &priv->lock_ctx;
struct vm_area_struct *vma;
- if (priv->mmap_locked)
+ if (lock_ctx->mmap_locked)
return vma_next(&priv->iter);
- unlock_vma(priv);
- vma = lock_next_vma(priv->mm, &priv->iter, last_pos);
+ unlock_ctx_vma(lock_ctx);
+ vma = lock_next_vma(lock_ctx->mm, &priv->iter, last_pos);
if (!IS_ERR_OR_NULL(vma))
- priv->locked_vma = vma;
+ lock_ctx->locked_vma = vma;
return vma;
}
@@ -193,14 +199,16 @@ static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv,
static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv,
loff_t pos)
{
- if (priv->mmap_locked)
+ struct proc_maps_locking_ctx *lock_ctx = &priv->lock_ctx;
+
+ if (lock_ctx->mmap_locked)
return false;
rcu_read_unlock();
- mmap_read_lock(priv->mm);
+ mmap_read_lock(lock_ctx->mm);
/* Reinitialize the iterator after taking mmap_lock */
vma_iter_set(&priv->iter, pos);
- priv->mmap_locked = true;
+ lock_ctx->mmap_locked = true;
return true;
}
@@ -208,14 +216,14 @@ static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv,
#else /* CONFIG_PER_VMA_LOCK */
static inline bool lock_vma_range(struct seq_file *m,
- struct proc_maps_private *priv)
+ struct proc_maps_locking_ctx *lock_ctx)
{
- return mmap_read_lock_killable(priv->mm) == 0;
+ return mmap_read_lock_killable(lock_ctx->mm) == 0;
}
-static inline void unlock_vma_range(struct proc_maps_private *priv)
+static inline void unlock_vma_range(struct proc_maps_locking_ctx *lock_ctx)
{
- mmap_read_unlock(priv->mm);
+ mmap_read_unlock(lock_ctx->mm);
}
static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv,
@@ -258,7 +266,7 @@ retry:
*ppos = vma->vm_end;
} else {
*ppos = SENTINEL_VMA_GATE;
- vma = get_gate_vma(priv->mm);
+ vma = get_gate_vma(priv->lock_ctx.mm);
}
return vma;
@@ -267,6 +275,7 @@ retry:
static void *m_start(struct seq_file *m, loff_t *ppos)
{
struct proc_maps_private *priv = m->private;
+ struct proc_maps_locking_ctx *lock_ctx;
loff_t last_addr = *ppos;
struct mm_struct *mm;
@@ -278,14 +287,15 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
if (!priv->task)
return ERR_PTR(-ESRCH);
- mm = priv->mm;
+ lock_ctx = &priv->lock_ctx;
+ mm = lock_ctx->mm;
if (!mm || !mmget_not_zero(mm)) {
put_task_struct(priv->task);
priv->task = NULL;
return NULL;
}
- if (!lock_vma_range(m, priv)) {
+ if (!lock_vma_range(m, lock_ctx)) {
mmput(mm);
put_task_struct(priv->task);
priv->task = NULL;
@@ -318,13 +328,13 @@ static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
static void m_stop(struct seq_file *m, void *v)
{
struct proc_maps_private *priv = m->private;
- struct mm_struct *mm = priv->mm;
+ struct mm_struct *mm = priv->lock_ctx.mm;
if (!priv->task)
return;
release_task_mempolicy(priv);
- unlock_vma_range(priv);
+ unlock_vma_range(&priv->lock_ctx);
mmput(mm);
put_task_struct(priv->task);
priv->task = NULL;
@@ -339,9 +349,9 @@ static int proc_maps_open(struct inode *inode, struct file *file,
return -ENOMEM;
priv->inode = inode;
- priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR(priv->mm)) {
- int err = PTR_ERR(priv->mm);
+ priv->lock_ctx.mm = proc_mem_open(inode, PTRACE_MODE_READ);
+ if (IS_ERR(priv->lock_ctx.mm)) {
+ int err = PTR_ERR(priv->lock_ctx.mm);
seq_release_private(inode, file);
return err;
@@ -355,8 +365,8 @@ static int proc_map_release(struct inode *inode, struct file *file)
struct seq_file *seq = file->private_data;
struct proc_maps_private *priv = seq->private;
- if (priv->mm)
- mmdrop(priv->mm);
+ if (priv->lock_ctx.mm)
+ mmdrop(priv->lock_ctx.mm);
return seq_release_private(inode, file);
}
@@ -517,28 +527,90 @@ static int pid_maps_open(struct inode *inode, struct file *file)
PROCMAP_QUERY_VMA_FLAGS \
)
-static int query_vma_setup(struct mm_struct *mm)
+#ifdef CONFIG_PER_VMA_LOCK
+
+static int query_vma_setup(struct proc_maps_locking_ctx *lock_ctx)
{
- return mmap_read_lock_killable(mm);
+ reset_lock_ctx(lock_ctx);
+
+ return 0;
}
-static void query_vma_teardown(struct mm_struct *mm, struct vm_area_struct *vma)
+static void query_vma_teardown(struct proc_maps_locking_ctx *lock_ctx)
{
- mmap_read_unlock(mm);
+ if (lock_ctx->mmap_locked) {
+ mmap_read_unlock(lock_ctx->mm);
+ lock_ctx->mmap_locked = false;
+ } else {
+ unlock_ctx_vma(lock_ctx);
+ }
+}
+
+static struct vm_area_struct *query_vma_find_by_addr(struct proc_maps_locking_ctx *lock_ctx,
+ unsigned long addr)
+{
+ struct mm_struct *mm = lock_ctx->mm;
+ struct vm_area_struct *vma;
+ struct vma_iterator vmi;
+
+ if (lock_ctx->mmap_locked)
+ return find_vma(mm, addr);
+
+ /* Unlock previously locked VMA and find the next one under RCU */
+ unlock_ctx_vma(lock_ctx);
+ rcu_read_lock();
+ vma_iter_init(&vmi, mm, addr);
+ vma = lock_next_vma(mm, &vmi, addr);
+ rcu_read_unlock();
+
+ if (!vma)
+ return NULL;
+
+ if (!IS_ERR(vma)) {
+ lock_ctx->locked_vma = vma;
+ return vma;
+ }
+
+ if (PTR_ERR(vma) == -EAGAIN) {
+ /* Fallback to mmap_lock on vma->vm_refcnt overflow */
+ mmap_read_lock(mm);
+ vma = find_vma(mm, addr);
+ lock_ctx->mmap_locked = true;
+ }
+
+ return vma;
+}
+
+#else /* CONFIG_PER_VMA_LOCK */
+
+static int query_vma_setup(struct proc_maps_locking_ctx *lock_ctx)
+{
+ return mmap_read_lock_killable(lock_ctx->mm);
+}
+
+static void query_vma_teardown(struct proc_maps_locking_ctx *lock_ctx)
+{
+ mmap_read_unlock(lock_ctx->mm);
}
-static struct vm_area_struct *query_vma_find_by_addr(struct mm_struct *mm, unsigned long addr)
+static struct vm_area_struct *query_vma_find_by_addr(struct proc_maps_locking_ctx *lock_ctx,
+ unsigned long addr)
{
- return find_vma(mm, addr);
+ return find_vma(lock_ctx->mm, addr);
}
-static struct vm_area_struct *query_matching_vma(struct mm_struct *mm,
+#endif /* CONFIG_PER_VMA_LOCK */
+
+static struct vm_area_struct *query_matching_vma(struct proc_maps_locking_ctx *lock_ctx,
unsigned long addr, u32 flags)
{
struct vm_area_struct *vma;
next_vma:
- vma = query_vma_find_by_addr(mm, addr);
+ vma = query_vma_find_by_addr(lock_ctx, addr);
+ if (IS_ERR(vma))
+ return vma;
+
if (!vma)
goto no_vma;
@@ -579,11 +651,11 @@ no_vma:
return ERR_PTR(-ENOENT);
}
-static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg)
+static int do_procmap_query(struct mm_struct *mm, void __user *uarg)
{
+ struct proc_maps_locking_ctx lock_ctx = { .mm = mm };
struct procmap_query karg;
struct vm_area_struct *vma;
- struct mm_struct *mm;
const char *name = NULL;
char build_id_buf[BUILD_ID_SIZE_MAX], *name_buf = NULL;
__u64 usize;
@@ -610,17 +682,16 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg)
if (!!karg.build_id_size != !!karg.build_id_addr)
return -EINVAL;
- mm = priv->mm;
if (!mm || !mmget_not_zero(mm))
return -ESRCH;
- err = query_vma_setup(mm);
+ err = query_vma_setup(&lock_ctx);
if (err) {
mmput(mm);
return err;
}
- vma = query_matching_vma(mm, karg.query_addr, karg.query_flags);
+ vma = query_matching_vma(&lock_ctx, karg.query_addr, karg.query_flags);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
vma = NULL;
@@ -705,7 +776,7 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg)
}
/* unlock vma or mmap_lock, and put mm_struct before copying data to user */
- query_vma_teardown(mm, vma);
+ query_vma_teardown(&lock_ctx);
mmput(mm);
if (karg.vma_name_size && copy_to_user(u64_to_user_ptr(karg.vma_name_addr),
@@ -725,7 +796,7 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg)
return 0;
out:
- query_vma_teardown(mm, vma);
+ query_vma_teardown(&lock_ctx);
mmput(mm);
kfree(name_buf);
return err;
@@ -738,7 +809,8 @@ static long procfs_procmap_ioctl(struct file *file, unsigned int cmd, unsigned l
switch (cmd) {
case PROCMAP_QUERY:
- return do_procmap_query(priv, (void __user *)arg);
+ /* priv->lock_ctx.mm is set during file open operation */
+ return do_procmap_query(priv->lock_ctx.mm, (void __user *)arg);
default:
return -ENOIOCTLCMD;
}
@@ -1297,8 +1369,8 @@ static int show_smap(struct seq_file *m, void *v)
__show_smap(m, &mss, false);
seq_printf(m, "THPeligible: %8u\n",
- !!thp_vma_allowable_orders(vma, vma->vm_flags,
- TVA_SMAPS | TVA_ENFORCE_SYSFS, THP_ORDERS_ALL));
+ !!thp_vma_allowable_orders(vma, vma->vm_flags, TVA_SMAPS,
+ THP_ORDERS_ALL));
if (arch_pkeys_enabled())
seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
@@ -1311,7 +1383,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
{
struct proc_maps_private *priv = m->private;
struct mem_size_stats mss = {};
- struct mm_struct *mm = priv->mm;
+ struct mm_struct *mm = priv->lock_ctx.mm;
struct vm_area_struct *vma;
unsigned long vma_start = 0, last_vma_end = 0;
int ret = 0;
@@ -1456,9 +1528,9 @@ static int smaps_rollup_open(struct inode *inode, struct file *file)
goto out_free;
priv->inode = inode;
- priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR_OR_NULL(priv->mm)) {
- ret = priv->mm ? PTR_ERR(priv->mm) : -ESRCH;
+ priv->lock_ctx.mm = proc_mem_open(inode, PTRACE_MODE_READ);
+ if (IS_ERR_OR_NULL(priv->lock_ctx.mm)) {
+ ret = priv->lock_ctx.mm ? PTR_ERR(priv->lock_ctx.mm) : -ESRCH;
single_release(inode, file);
goto out_free;
@@ -1476,8 +1548,8 @@ static int smaps_rollup_release(struct inode *inode, struct file *file)
struct seq_file *seq = file->private_data;
struct proc_maps_private *priv = seq->private;
- if (priv->mm)
- mmdrop(priv->mm);
+ if (priv->lock_ctx.mm)
+ mmdrop(priv->lock_ctx.mm);
kfree(priv);
return single_release(inode, file);
@@ -1520,7 +1592,7 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
return false;
if (!is_cow_mapping(vma->vm_flags))
return false;
- if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)))
+ if (likely(!mm_flags_test(MMF_HAS_PINNED, vma->vm_mm)))
return false;
folio = vm_normal_folio(vma, addr, pte);
if (!folio)
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 59bfd61d653a..d362919f4f68 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -204,7 +204,7 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
if (!priv->task)
return ERR_PTR(-ESRCH);
- mm = priv->mm;
+ mm = priv->lock_ctx.mm;
if (!mm || !mmget_not_zero(mm)) {
put_task_struct(priv->task);
priv->task = NULL;
@@ -226,7 +226,7 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
static void m_stop(struct seq_file *m, void *v)
{
struct proc_maps_private *priv = m->private;
- struct mm_struct *mm = priv->mm;
+ struct mm_struct *mm = priv->lock_ctx.mm;
if (!priv->task)
return;
@@ -259,9 +259,9 @@ static int maps_open(struct inode *inode, struct file *file,
return -ENOMEM;
priv->inode = inode;
- priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR_OR_NULL(priv->mm)) {
- int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH;
+ priv->lock_ctx.mm = proc_mem_open(inode, PTRACE_MODE_READ);
+ if (IS_ERR_OR_NULL(priv->lock_ctx.mm)) {
+ int err = priv->lock_ctx.mm ? PTR_ERR(priv->lock_ctx.mm) : -ESRCH;
seq_release_private(inode, file);
return err;
@@ -276,8 +276,8 @@ static int map_release(struct inode *inode, struct file *file)
struct seq_file *seq = file->private_data;
struct proc_maps_private *priv = seq->private;
- if (priv->mm)
- mmdrop(priv->mm);
+ if (priv->lock_ctx.mm)
+ mmdrop(priv->lock_ctx.mm);
return seq_release_private(inode, file);
}
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index e75a6cec67be..ca41ce8208c4 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -107,7 +107,7 @@ static int do_readpage(struct folio *folio)
size_t offset = 0;
dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
- inode->i_ino, folio->index, i_size, folio->flags);
+ inode->i_ino, folio->index, i_size, folio->flags.f);
ubifs_assert(c, !folio_test_checked(folio));
ubifs_assert(c, !folio->private);
@@ -600,7 +600,7 @@ static int populate_page(struct ubifs_info *c, struct folio *folio,
pgoff_t end_index;
dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
- inode->i_ino, folio->index, i_size, folio->flags);
+ inode->i_ino, folio->index, i_size, folio->flags.f);
end_index = (i_size - 1) >> PAGE_SHIFT;
if (!i_size || folio->index > end_index) {
@@ -988,7 +988,7 @@ static int ubifs_writepage(struct folio *folio, struct writeback_control *wbc)
int err, len = folio_size(folio);
dbg_gen("ino %lu, pg %lu, pg flags %#lx",
- inode->i_ino, folio->index, folio->flags);
+ inode->i_ino, folio->index, folio->flags.f);
ubifs_assert(c, folio->private != NULL);
/* Is the folio fully outside @i_size? (truncate in progress) */