summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2009-01-04 12:00:53 -0800
committerGreg Kroah-Hartman <gregkh@suse.de>2009-01-18 10:35:43 -0800
commit8f0346e65d22136372207b6507bc646f9efb2804 (patch)
tree27a31a4376fb14c72fa702a27d6f5075662fd440
parentc9bb99e4921c92adfbfee5c17b2bfdd512da33c7 (diff)
fs: symlink write_begin allocation context fix
commit 54566b2c1594c2326a645a3551f9d989f7ba3c5e upstream. With the write_begin/write_end aops, page_symlink was broken because it could no longer pass a GFP_NOFS type mask into the point where the allocations happened. They are done in write_begin, which would always assume that the filesystem can be entered from reclaim. This bug could cause filesystem deadlocks. The funny thing with having a gfp_t mask there is that it doesn't really allow the caller to arbitrarily tinker with the context in which it can be called. It couldn't ever be GFP_ATOMIC, for example, because it needs to take the page lock. The only thing any callers care about is __GFP_FS anyway, so turn that into a single flag. Add a new flag for write_begin, AOP_FLAG_NOFS. Filesystems can now act on this flag in their write_begin function. Change __grab_cache_page to accept a nofs argument as well, to honour that flag (while we're there, change the name to grab_cache_page_write_begin which is more instructive and does away with random leading underscores). This is really a more flexible way to go in the end anyway -- if a filesystem happens to want any extra allocations aside from the pagecache ones in ints write_begin function, it may now use GFP_KERNEL (rather than GFP_NOFS) for common case allocations (eg. ocfs2_alloc_write_ctxt, for a random example). [kosaki.motohiro@jp.fujitsu.com: fix ubifs] [kosaki.motohiro@jp.fujitsu.com: fix fuse] Signed-off-by: Nick Piggin <npiggin@suse.de> Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: <stable@kernel.org> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> [ Cleaned up the calling convention: just pass in the AOP flags untouched to the grab_cache_page_write_begin() function. That just simplifies everybody, and may even allow future expansion of the logic. - Linus ] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/namei.c3
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/namei.c3
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/gfs2/ops_address.c2
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/namei.c13
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/smbfs/file.c2
-rw-r--r--fs/ubifs/file.c9
-rw-r--r--include/linux/fs.h5
-rw-r--r--include/linux/pagemap.h3
-rw-r--r--mm/filemap.c17
19 files changed, 48 insertions, 35 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 1377b1240b6e..9246cb4aa018 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -628,7 +628,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
}
index = pos >> PAGE_CACHE_SHIFT;
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
*pagep = page;
diff --git a/fs/buffer.c b/fs/buffer.c
index 95a165d3cfe1..a542f970737e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1988,7 +1988,7 @@ int block_write_begin(struct file *file, struct address_space *mapping,
page = *pagep;
if (page == NULL) {
ownpage = 1;
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
status = -ENOMEM;
goto out;
@@ -2494,7 +2494,7 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
*pagep = page;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 507d8689b111..c5f40a05bcae 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1152,7 +1152,7 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
to = from + len;
retry:
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
*pagep = page;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index de13e919cd81..1f5538263b62 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2187,8 +2187,7 @@ retry:
* We have a transaction open. All is sweetness. It also sets
* i_size in generic_commit_write().
*/
- err = __page_symlink(inode, symname, l,
- mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+ err = __page_symlink(inode, symname, l, 1);
if (err) {
drop_nlink(inode);
ext3_mark_inode_dirty(handle, inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 846a79096c25..d77f674d393d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1370,7 +1370,7 @@ retry:
goto out;
}
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
ext4_journal_stop(handle);
ret = -ENOMEM;
@@ -2421,7 +2421,7 @@ retry:
goto out;
}
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
ext4_journal_stop(handle);
ret = -ENOMEM;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 387ad98350c3..d626533f3c1a 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2216,8 +2216,7 @@ retry:
* We have a transaction open. All is sweetness. It also sets
* i_size in generic_commit_write().
*/
- err = __page_symlink(inode, symname, l,
- mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+ err = __page_symlink(inode, symname, l, 1);
if (err) {
clear_nlink(inode);
ext4_mark_inode_dirty(handle, inode);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2bada6bbc317..c8206dbc1d8d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -644,7 +644,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
{
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- *pagep = __grab_cache_page(mapping, index);
+ *pagep = grab_cache_page_write_begin(mapping, index, flags);
if (!*pagep)
return -ENOMEM;
return 0;
@@ -777,7 +777,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
break;
err = -ENOMEM;
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, 0);
if (!page)
break;
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index e64a1b04117a..c75df0624b59 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -675,7 +675,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
goto out_trans_fail;
error = -ENOMEM;
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
*pagep = page;
if (unlikely(!page))
goto out_endtrans;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index d6ecabf4d231..5e17cae9fe2c 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -500,7 +500,7 @@ int hostfs_write_begin(struct file *file, struct address_space *mapping,
{
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- *pagep = __grab_cache_page(mapping, index);
+ *pagep = grab_cache_page_write_begin(mapping, index, flags);
if (!*pagep)
return -ENOMEM;
return 0;
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 5a98aa87c853..5edc2bf20581 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -132,7 +132,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
uint32_t pageofs = index << PAGE_CACHE_SHIFT;
int ret = 0;
- pg = __grab_cache_page(mapping, index);
+ pg = grab_cache_page_write_begin(mapping, index, flags);
if (!pg)
return -ENOMEM;
*pagep = pg;
diff --git a/fs/libfs.c b/fs/libfs.c
index 1add676a19df..8fc2407d2fe6 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -360,7 +360,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
index = pos >> PAGE_CACHE_SHIFT;
from = pos & (PAGE_CACHE_SIZE - 1);
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
diff --git a/fs/namei.c b/fs/namei.c
index 50c21930c357..2259d21ae281 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2796,18 +2796,23 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
}
}
-int __page_symlink(struct inode *inode, const char *symname, int len,
- gfp_t gfp_mask)
+/*
+ * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
+ */
+int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
void *fsdata;
int err;
char *kaddr;
+ unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE;
+ if (nofs)
+ flags |= AOP_FLAG_NOFS;
retry:
err = pagecache_write_begin(NULL, mapping, 0, len-1,
- AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
+ flags, &page, &fsdata);
if (err)
goto fail;
@@ -2831,7 +2836,7 @@ fail:
int page_symlink(struct inode *inode, const char *symname, int len)
{
return __page_symlink(inode, symname, len,
- mapping_gfp_mask(inode->i_mapping));
+ !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
}
const struct inode_operations page_symlink_inode_operations = {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 78460657f5cb..30541f0ea392 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -351,7 +351,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
file->f_path.dentry->d_name.name,
mapping->host->i_ino, len, (long long) pos);
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
*pagep = page;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 5699171212ae..8c2615e802cc 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2565,7 +2565,7 @@ static int reiserfs_write_begin(struct file *file,
}
index = pos >> PAGE_CACHE_SHIFT;
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
*pagep = page;
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index e4f8d51a5553..92d5e8ffb639 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -297,7 +297,7 @@ static int smb_write_begin(struct file *file, struct address_space *mapping,
struct page **pagep, void **fsdata)
{
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- *pagep = __grab_cache_page(mapping, index);
+ *pagep = grab_cache_page_write_begin(mapping, index, flags);
if (!*pagep)
return -ENOMEM;
return 0;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 3d698e2022b1..40033dccbd93 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -213,7 +213,8 @@ static void release_existing_page_budget(struct ubifs_info *c)
}
static int write_begin_slow(struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep)
+ loff_t pos, unsigned len, struct page **pagep,
+ unsigned flags)
{
struct inode *inode = mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -241,7 +242,7 @@ static int write_begin_slow(struct address_space *mapping,
if (unlikely(err))
return err;
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (unlikely(!page)) {
ubifs_release_budget(c, &req);
return -ENOMEM;
@@ -432,7 +433,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
return -EROFS;
/* Try out the fast-path part first */
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (unlikely(!page))
return -ENOMEM;
@@ -477,7 +478,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
unlock_page(page);
page_cache_release(page);
- return write_begin_slow(mapping, pos, len, pagep);
+ return write_begin_slow(mapping, pos, len, pagep, flags);
}
/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 580b513668fe..d621217149f9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -403,6 +403,9 @@ enum positive_aop_returns {
#define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */
#define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */
+#define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct
+ * helper code (eg buffer layer)
+ * to clear GFP_FS from alloc */
/*
* oh the beauties of C type declarations.
@@ -1959,7 +1962,7 @@ extern int page_readlink(struct dentry *, char __user *, int);
extern void *page_follow_link_light(struct dentry *, struct nameidata *);
extern void page_put_link(struct dentry *, struct nameidata *, void *);
extern int __page_symlink(struct inode *inode, const char *symname, int len,
- gfp_t gfp_mask);
+ int nofs);
extern int page_symlink(struct inode *inode, const char *symname, int len);
extern const struct inode_operations page_symlink_inode_operations;
extern int generic_readlink(struct dentry *, char __user *, int);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 5da31c12101c..62efcebf8760 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -213,7 +213,8 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
int tag, unsigned int nr_pages, struct page **pages);
-struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index);
+struct page *grab_cache_page_write_begin(struct address_space *mapping,
+ pgoff_t index, unsigned flags);
/*
* Returns locked page at given index in given cache, creating it if needed.
diff --git a/mm/filemap.c b/mm/filemap.c
index 1cd0b87861bb..f3033d0f5e51 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2033,7 +2033,7 @@ int pagecache_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
struct page *page;
again:
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
*pagep = page;
if (!page)
return -ENOMEM;
@@ -2197,19 +2197,24 @@ EXPORT_SYMBOL(generic_file_direct_write);
* Find or create a page at the given pagecache position. Return the locked
* page. This function is specifically for buffered writes.
*/
-struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index)
+struct page *grab_cache_page_write_begin(struct address_space *mapping,
+ pgoff_t index, unsigned flags)
{
int status;
struct page *page;
+ gfp_t gfp_notmask = 0;
+ if (flags & AOP_FLAG_NOFS)
+ gfp_notmask = __GFP_FS;
repeat:
page = find_lock_page(mapping, index);
if (likely(page))
return page;
- page = page_cache_alloc(mapping);
+ page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);
if (!page)
return NULL;
- status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+ status = add_to_page_cache_lru(page, mapping, index,
+ GFP_KERNEL & ~gfp_notmask);
if (unlikely(status)) {
page_cache_release(page);
if (status == -EEXIST)
@@ -2218,7 +2223,7 @@ repeat:
}
return page;
}
-EXPORT_SYMBOL(__grab_cache_page);
+EXPORT_SYMBOL(grab_cache_page_write_begin);
static ssize_t generic_perform_write_2copy(struct file *file,
struct iov_iter *i, loff_t pos)
@@ -2263,7 +2268,7 @@ static ssize_t generic_perform_write_2copy(struct file *file,
break;
}
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, 0);
if (!page) {
status = -ENOMEM;
break;