summaryrefslogtreecommitdiff
path: root/fs/dax.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dax.c')
-rw-r--r--fs/dax.c186
1 files changed, 114 insertions, 72 deletions
diff --git a/fs/dax.c b/fs/dax.c
index 6674540363e8..cf96f3dd4e5f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -71,6 +71,11 @@ static unsigned long dax_to_pfn(void *entry)
return xa_to_value(entry) >> DAX_SHIFT;
}
+static struct folio *dax_to_folio(void *entry)
+{
+ return page_folio(pfn_to_page(dax_to_pfn(entry)));
+}
+
static void *dax_make_entry(pfn_t pfn, unsigned long flags)
{
return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT));
@@ -338,19 +343,6 @@ static unsigned long dax_entry_size(void *entry)
return PAGE_SIZE;
}
-static unsigned long dax_end_pfn(void *entry)
-{
- return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
-}
-
-/*
- * Iterate through all mapped pfns represented by an entry, i.e. skip
- * 'empty' and 'zero' entries.
- */
-#define for_each_mapped_pfn(entry, pfn) \
- for (pfn = dax_to_pfn(entry); \
- pfn < dax_end_pfn(entry); pfn++)
-
/*
* A DAX folio is considered shared if it has no mapping set and ->share (which
* shares the ->index field) is non-zero. Note this may return false even if the
@@ -359,7 +351,7 @@ static unsigned long dax_end_pfn(void *entry)
*/
static inline bool dax_folio_is_shared(struct folio *folio)
{
- return !folio->mapping && folio->page.share;
+ return !folio->mapping && folio->share;
}
/*
@@ -384,75 +376,117 @@ static void dax_folio_make_shared(struct folio *folio)
* folio has previously been mapped into one address space so set the
* share count.
*/
- folio->page.share = 1;
+ folio->share = 1;
}
-static inline unsigned long dax_folio_share_put(struct folio *folio)
+static inline unsigned long dax_folio_put(struct folio *folio)
{
- return --folio->page.share;
+ unsigned long ref;
+ int order, i;
+
+ if (!dax_folio_is_shared(folio))
+ ref = 0;
+ else
+ ref = --folio->share;
+
+ if (ref)
+ return ref;
+
+ folio->mapping = NULL;
+ order = folio_order(folio);
+ if (!order)
+ return 0;
+
+ for (i = 0; i < (1UL << order); i++) {
+ struct dev_pagemap *pgmap = page_pgmap(&folio->page);
+ struct page *page = folio_page(folio, i);
+ struct folio *new_folio = (struct folio *)page;
+
+ ClearPageHead(page);
+ clear_compound_head(page);
+
+ new_folio->mapping = NULL;
+ /*
+ * Reset pgmap which was over-written by
+ * prep_compound_page().
+ */
+ new_folio->pgmap = pgmap;
+ new_folio->share = 0;
+ WARN_ON_ONCE(folio_ref_count(new_folio));
+ }
+
+ return ref;
+}
+
+static void dax_folio_init(void *entry)
+{
+ struct folio *folio = dax_to_folio(entry);
+ int order = dax_entry_order(entry);
+
+ /*
+ * Folio should have been split back to order-0 pages in
+ * dax_folio_put() when they were removed from their
+ * final mapping.
+ */
+ WARN_ON_ONCE(folio_order(folio));
+
+ if (order > 0) {
+ prep_compound_page(&folio->page, order);
+ if (order > 1)
+ INIT_LIST_HEAD(&folio->_deferred_list);
+ WARN_ON_ONCE(folio_ref_count(folio));
+ }
}
static void dax_associate_entry(void *entry, struct address_space *mapping,
- struct vm_area_struct *vma, unsigned long address, bool shared)
+ struct vm_area_struct *vma,
+ unsigned long address, bool shared)
{
- unsigned long size = dax_entry_size(entry), pfn, index;
- int i = 0;
+ unsigned long size = dax_entry_size(entry), index;
+ struct folio *folio = dax_to_folio(entry);
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
return;
index = linear_page_index(vma, address & ~(size - 1));
- for_each_mapped_pfn(entry, pfn) {
- struct folio *folio = pfn_folio(pfn);
-
- if (shared && (folio->mapping || folio->page.share)) {
- if (folio->mapping)
- dax_folio_make_shared(folio);
+ if (shared && (folio->mapping || dax_folio_is_shared(folio))) {
+ if (folio->mapping)
+ dax_folio_make_shared(folio);
- WARN_ON_ONCE(!folio->page.share);
- folio->page.share++;
- } else {
- WARN_ON_ONCE(folio->mapping);
- folio->mapping = mapping;
- folio->index = index + i++;
- }
+ WARN_ON_ONCE(!folio->share);
+ WARN_ON_ONCE(dax_entry_order(entry) != folio_order(folio));
+ folio->share++;
+ } else {
+ WARN_ON_ONCE(folio->mapping);
+ dax_folio_init(entry);
+ folio = dax_to_folio(entry);
+ folio->mapping = mapping;
+ folio->index = index;
}
}
static void dax_disassociate_entry(void *entry, struct address_space *mapping,
- bool trunc)
+ bool trunc)
{
- unsigned long pfn;
+ struct folio *folio = dax_to_folio(entry);
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
return;
- for_each_mapped_pfn(entry, pfn) {
- struct folio *folio = pfn_folio(pfn);
-
- WARN_ON_ONCE(trunc && folio_ref_count(folio) > 1);
- if (dax_folio_is_shared(folio)) {
- /* keep the shared flag if this page is still shared */
- if (dax_folio_share_put(folio) > 0)
- continue;
- } else
- WARN_ON_ONCE(folio->mapping && folio->mapping != mapping);
- folio->mapping = NULL;
- folio->index = 0;
- }
+ dax_folio_put(folio);
}
static struct page *dax_busy_page(void *entry)
{
- unsigned long pfn;
+ struct folio *folio = dax_to_folio(entry);
- for_each_mapped_pfn(entry, pfn) {
- struct page *page = pfn_to_page(pfn);
+ if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
+ return NULL;
- if (page_ref_count(page) > 1)
- return page;
- }
- return NULL;
+ if (folio_ref_count(folio) - folio_mapcount(folio))
+ return &folio->page;
+ else
+ return NULL;
}
/**
@@ -785,7 +819,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
EXPORT_SYMBOL_GPL(dax_layout_busy_page);
static int __dax_invalidate_entry(struct address_space *mapping,
- pgoff_t index, bool trunc)
+ pgoff_t index, bool trunc)
{
XA_STATE(xas, &mapping->i_pages, index);
int ret = 0;
@@ -953,7 +987,8 @@ void dax_break_layout_final(struct inode *inode)
wait_page_idle_uninterruptible(page, inode);
} while (true);
- dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX);
+ if (!page)
+ dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX);
}
EXPORT_SYMBOL_GPL(dax_break_layout_final);
@@ -1039,8 +1074,10 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
void *old;
dax_disassociate_entry(entry, mapping, false);
- dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address,
- shared);
+ if (!(flags & DAX_ZERO_PAGE))
+ dax_associate_entry(new_entry, mapping, vmf->vma,
+ vmf->address, shared);
+
/*
* Only swap our new entry into the page cache if the current
* entry is a zero page or an empty entry. If a normal PTE or
@@ -1228,9 +1265,7 @@ static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos,
goto out;
if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1))
goto out;
- /* For larger pages we need devmap */
- if (length > 1 && !pfn_t_devmap(*pfnp))
- goto out;
+
rc = 0;
out_check_addr:
@@ -1337,7 +1372,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, struct vm_fault *vmf,
*entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_ZERO_PAGE);
- ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
+ ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), false);
trace_dax_load_hole(inode, vmf, ret);
return ret;
}
@@ -1808,7 +1843,8 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT;
bool write = iter->flags & IOMAP_WRITE;
unsigned long entry_flags = pmd ? DAX_PMD : 0;
- int err = 0;
+ struct folio *folio;
+ int ret, err = 0;
pfn_t pfn;
void *kaddr;
@@ -1840,17 +1876,19 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
return dax_fault_return(err);
}
+ folio = dax_to_folio(*entry);
if (dax_fault_is_synchronous(iter, vmf->vma))
return dax_fault_synchronous_pfnp(pfnp, pfn);
- /* insert PMD pfn */
+ folio_ref_inc(folio);
if (pmd)
- return vmf_insert_pfn_pmd(vmf, pfn, write);
+ ret = vmf_insert_folio_pmd(vmf, pfn_folio(pfn_t_to_pfn(pfn)),
+ write);
+ else
+ ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), write);
+ folio_put(folio);
- /* insert PTE pfn */
- if (write)
- return vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
- return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
+ return ret;
}
static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
@@ -2089,6 +2127,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order);
+ struct folio *folio;
void *entry;
vm_fault_t ret;
@@ -2106,14 +2145,17 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
xas_set_mark(&xas, PAGECACHE_TAG_DIRTY);
dax_lock_entry(&xas, entry);
xas_unlock_irq(&xas);
+ folio = pfn_folio(pfn_t_to_pfn(pfn));
+ folio_ref_inc(folio);
if (order == 0)
- ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
+ ret = vmf_insert_page_mkwrite(vmf, &folio->page, true);
#ifdef CONFIG_FS_DAX_PMD
else if (order == PMD_ORDER)
- ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
+ ret = vmf_insert_folio_pmd(vmf, folio, FAULT_FLAG_WRITE);
#endif
else
ret = VM_FAULT_FALLBACK;
+ folio_put(folio);
dax_unlock_entry(&xas, entry);
trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
return ret;