From 454e2398be9b9fa30433fccc548db34d19aa9958 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Jun 2006 02:02:57 -0700 Subject: [PATCH] VFS: Permit filesystem to override root dentry on mount Extend the get_sb() filesystem operation to take an extra argument that permits the VFS to pass in the target vfsmount that defines the mountpoint. The filesystem is then required to manually set the superblock and root dentry pointers. For most filesystems, this should be done with simple_set_mnt() which will set the superblock pointer and then set the root dentry to the superblock's s_root (as per the old default behaviour). The get_sb() op now returns an integer as there's now no need to return the superblock pointer. This patch permits a superblock to be implicitly shared amongst several mount points, such as can be done with NFS to avoid potential inode aliasing. In such a case, simple_set_mnt() would not be called, and instead the mnt_root and mnt_sb would be set directly. The patch also makes the following changes: (*) the get_sb_*() convenience functions in the core kernel now take a vfsmount pointer argument and return an integer, so most filesystems have to change very little. (*) If one of the convenience function is not used, then get_sb() should normally call simple_set_mnt() to instantiate the vfsmount. This will always return 0, and so can be tail-called from get_sb(). (*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the dcache upon superblock destruction rather than shrink_dcache_anon(). This is required because the superblock may now have multiple trees that aren't actually bound to s_root, but that still need to be cleaned up. The currently called functions assume that the whole tree is rooted at s_root, and that anonymous dentries are not the roots of trees which results in dentries being left unculled. However, with the way NFS superblock sharing are currently set to be implemented, these assumptions are violated: the root of the filesystem is simply a dummy dentry and inode (the real inode for '/' may well be inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries with child trees. [*] Anonymous until discovered from another tree. (*) The documentation has been adjusted, including the additional bit of changing ext2_* into foo_* in the documentation. [akpm@osdl.org: convert ipath_fs, do other stuff] Signed-off-by: David Howells Acked-by: Al Viro Cc: Nathan Scott Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/hugetlbfs') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3a5b4e923455..4665c26171f7 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -723,10 +723,10 @@ void hugetlb_put_quota(struct address_space *mapping) } } -static struct super_block *hugetlbfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int hugetlbfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super); + return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt); } static struct file_system_type hugetlbfs_fs_type = { -- cgit v1.2.3 From 726c334223180e3c0197cc980a432681370d4baf Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Jun 2006 02:02:58 -0700 Subject: [PATCH] VFS: Permit filesystem to perform statfs with a known root dentry Give the statfs superblock operation a dentry pointer rather than a superblock pointer. This complements the get_sb() patch. That reduced the significance of sb->s_root, allowing NFS to place a fake root there. However, NFS does require a dentry to use as a target for the statfs operation. This permits the root in the vfsmount to be used instead. linux/mount.h has been added where necessary to make allyesconfig build successfully. Interest has also been expressed for use with the FUSE and XFS filesystems. Signed-off-by: David Howells Acked-by: Al Viro Cc: Nathan Scott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/hugetlbfs') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 4665c26171f7..678fc72c3646 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -467,9 +467,9 @@ static int hugetlbfs_set_page_dirty(struct page *page) return 0; } -static int hugetlbfs_statfs(struct super_block *sb, struct kstatfs *buf) +static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); + struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); buf->f_type = HUGETLBFS_MAGIC; buf->f_bsize = HPAGE_SIZE; -- cgit v1.2.3 From a43a8c39bbb493c9e93f6764b350de2e33e18e92 Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Fri, 23 Jun 2006 02:03:15 -0700 Subject: [PATCH] tightening hugetlb strict accounting Current hugetlb strict accounting for shared mapping always assume mapping starts at zero file offset and reserves pages between zero and size of the file. This assumption often reserves (or lock down) a lot more pages then necessary if application maps at none zero file offset. libhugetlbfs is one example that requires proper reservation on shared mapping starts at none zero offset. This patch extends the reservation and hugetlb strict accounting to support any arbitrary pair of (offset, len), resulting a much more robust and accurate scheme. More importantly, it won't lock down any hugetlb pages outside file mapping. Signed-off-by: Ken Chen Acked-by: Adam Litke Cc: David Gibson Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'fs/hugetlbfs') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 678fc72c3646..e6410d8edd0e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -59,7 +59,6 @@ static void huge_pagevec_release(struct pagevec *pvec) static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file->f_dentry->d_inode; - struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); loff_t len, vma_len; int ret; @@ -87,9 +86,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) goto out; - if (vma->vm_flags & VM_MAYSHARE) - if (hugetlb_extend_reservation(info, len >> HPAGE_SHIFT) != 0) - goto out; + if (vma->vm_flags & VM_MAYSHARE && + hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), + len >> HPAGE_SHIFT)) + goto out; ret = 0; hugetlb_prefault_arch_hook(vma->vm_mm); @@ -195,12 +195,8 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) const pgoff_t start = lstart >> HPAGE_SHIFT; struct pagevec pvec; pgoff_t next; - int i; + int i, freed = 0; - hugetlb_truncate_reservation(HUGETLBFS_I(inode), - lstart >> HPAGE_SHIFT); - if (!mapping->nrpages) - return; pagevec_init(&pvec, 0); next = start; while (1) { @@ -221,10 +217,12 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) truncate_huge_page(page); unlock_page(page); hugetlb_put_quota(mapping); + freed++; } huge_pagevec_release(&pvec); } BUG_ON(!lstart && mapping->nrpages); + hugetlb_unreserve_pages(inode, start, freed); } static void hugetlbfs_delete_inode(struct inode *inode) @@ -366,6 +364,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + INIT_LIST_HEAD(&inode->i_mapping->private_list); info = HUGETLBFS_I(inode); mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL); switch (mode & S_IFMT) { @@ -538,7 +537,6 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) hugetlbfs_inc_free_inodes(sbinfo); return NULL; } - p->prereserved_hpages = 0; return &p->vfs_inode; } @@ -781,8 +779,7 @@ struct file *hugetlb_zero_setup(size_t size) goto out_file; error = -ENOMEM; - if (hugetlb_extend_reservation(HUGETLBFS_I(inode), - size >> HPAGE_SHIFT) != 0) + if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) goto out_inode; d_instantiate(dentry, inode); -- cgit v1.2.3