From 978b7237123d007b9fa983af6e0e2fa8f97f9934 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Mon, 19 May 2008 16:29:46 +1000 Subject: [XFS] Fix fsync() b0rkage. xfs_fsync() fails to wait for data I/O completion before checking if the inode is dirty or clean to decide whether to log the inode or not. This misses inode size updates when the data flushed by the fsync() is extending the file. Hence, like fdatasync(), we need to wait for I/o completion first, then check the inode for cleanliness. Doing so makes the behaviour of xfs_fsync() identical for fsync and fdatasync and we *always* use synchronous semantics if the inode is dirty. Therefore also kill the differences and remove the unused flags from the xfs_fsync function and callers. SGI-PV: 981296 SGI-Modid: xfs-linux-melb:xfs-kern:31033a Signed-off-by: David Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_file.c | 17 ++++--- fs/xfs/linux-2.6/xfs_vnode.h | 8 ---- fs/xfs/xfs_vnodeops.c | 112 ++++++++++++++++--------------------------- fs/xfs/xfs_vnodeops.h | 3 +- 4 files changed, 54 insertions(+), 86 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 65e78c13d4ae..5f60363b9343 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -184,19 +184,24 @@ xfs_file_release( return -xfs_release(XFS_I(inode)); } +/* + * We ignore the datasync flag here because a datasync is effectively + * identical to an fsync. That is, datasync implies that we need to write + * only the metadata needed to be able to access the data that is written + * if we crash after the call completes. Hence if we are writing beyond + * EOF we have to log the inode size change as well, which makes it a + * full fsync. If we don't write beyond EOF, the inode core will be + * clean in memory and so we don't need to log the inode, just like + * fsync. + */ STATIC int xfs_file_fsync( struct file *filp, struct dentry *dentry, int datasync) { - int flags = FSYNC_WAIT; - - if (datasync) - flags |= FSYNC_DATA; xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); - return -xfs_fsync(XFS_I(dentry->d_inode), flags, - (xfs_off_t)0, (xfs_off_t)-1); + return -xfs_fsync(XFS_I(dentry->d_inode)); } /* diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 9d73cb5c0fc7..25eb2a9e8d9b 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -229,14 +229,6 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) #define ATTR_NOLOCK 0x200 /* Don't grab any conflicting locks */ #define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */ -/* - * Flags to vop_fsync/reclaim. - */ -#define FSYNC_NOWAIT 0 /* asynchronous flush */ -#define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */ -#define FSYNC_INVAL 0x2 /* flush and invalidate cached data */ -#define FSYNC_DATA 0x4 /* synchronous fsync of data only */ - /* * Tracking vnode activity. */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 70702a60b4bb..e475e3717eb3 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -856,18 +856,14 @@ xfs_readlink( /* * xfs_fsync * - * This is called to sync the inode and its data out to disk. - * We need to hold the I/O lock while flushing the data, and - * the inode lock while flushing the inode. The inode lock CANNOT - * be held while flushing the data, so acquire after we're done - * with that. + * This is called to sync the inode and its data out to disk. We need to hold + * the I/O lock while flushing the data, and the inode lock while flushing the + * inode. The inode lock CANNOT be held while flushing the data, so acquire + * after we're done with that. */ int xfs_fsync( - xfs_inode_t *ip, - int flag, - xfs_off_t start, - xfs_off_t stop) + xfs_inode_t *ip) { xfs_trans_t *tp; int error; @@ -875,103 +871,79 @@ xfs_fsync( xfs_itrace_entry(ip); - ASSERT(start >= 0 && stop >= -1); - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); - if (flag & FSYNC_DATA) - filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + /* capture size updates in I/O completion before writing the inode. */ + error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + if (error) + return XFS_ERROR(error); /* - * We always need to make sure that the required inode state - * is safe on disk. The vnode might be clean but because - * of committed transactions that haven't hit the disk yet. - * Likewise, there could be unflushed non-transactional - * changes to the inode core that have to go to disk. + * We always need to make sure that the required inode state is safe on + * disk. The vnode might be clean but we still might need to force the + * log because of committed transactions that haven't hit the disk yet. + * Likewise, there could be unflushed non-transactional changes to the + * inode core that have to go to disk and this requires us to issue + * a synchronous transaction to capture these changes correctly. * - * The following code depends on one assumption: that - * any transaction that changes an inode logs the core - * because it has to change some field in the inode core - * (typically nextents or nblocks). That assumption - * implies that any transactions against an inode will - * catch any non-transactional updates. If inode-altering - * transactions exist that violate this assumption, the - * code breaks. Right now, it figures that if the involved - * update_* field is clear and the inode is unpinned, the - * inode is clean. Either it's been flushed or it's been - * committed and the commit has hit the disk unpinning the inode. - * (Note that xfs_inode_item_format() called at commit clears - * the update_* fields.) + * This code relies on the assumption that if the update_* fields + * of the inode are clear and the inode is unpinned then it is clean + * and no action is required. */ xfs_ilock(ip, XFS_ILOCK_SHARED); - /* If we are flushing data then we care about update_size - * being set, otherwise we care about update_core - */ - if ((flag & FSYNC_DATA) ? - (ip->i_update_size == 0) : - (ip->i_update_core == 0)) { + if (!(ip->i_update_size || ip->i_update_core)) { /* - * Timestamps/size haven't changed since last inode - * flush or inode transaction commit. That means - * either nothing got written or a transaction - * committed which caught the updates. If the - * latter happened and the transaction hasn't - * hit the disk yet, the inode will be still - * be pinned. If it is, force the log. + * Timestamps/size haven't changed since last inode flush or + * inode transaction commit. That means either nothing got + * written or a transaction committed which caught the updates. + * If the latter happened and the transaction hasn't hit the + * disk yet, the inode will be still be pinned. If it is, + * force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { - _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, - XFS_LOG_FORCE | - ((flag & FSYNC_WAIT) - ? XFS_LOG_SYNC : 0), + error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, + XFS_LOG_FORCE | XFS_LOG_SYNC, &log_flushed); } else { /* - * If the inode is not pinned and nothing - * has changed we don't need to flush the - * cache. + * If the inode is not pinned and nothing has changed + * we don't need to flush the cache. */ changed = 0; } - error = 0; } else { /* - * Kick off a transaction to log the inode - * core to get the updates. Make it - * sync if FSYNC_WAIT is passed in (which - * is done by everybody but specfs). The - * sync transaction will also force the log. + * Kick off a transaction to log the inode core to get the + * updates. The sync transaction will also force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); - if ((error = xfs_trans_reserve(tp, 0, - XFS_FSYNC_TS_LOG_RES(ip->i_mount), - 0, 0, 0))) { + error = xfs_trans_reserve(tp, 0, + XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); + if (error) { xfs_trans_cancel(tp, 0); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); /* - * Note - it's possible that we might have pushed - * ourselves out of the way during trans_reserve - * which would flush the inode. But there's no - * guarantee that the inode buffer has actually - * gone out yet (it's delwri). Plus the buffer - * could be pinned anyway if it's part of an - * inode in another recent transaction. So we - * play it safe and fire off the transaction anyway. + * Note - it's possible that we might have pushed ourselves out + * of the way during trans_reserve which would flush the inode. + * But there's no guarantee that the inode buffer has actually + * gone out yet (it's delwri). Plus the buffer could be pinned + * anyway if it's part of an inode in another recent + * transaction. So we play it safe and fire off the + * transaction anyway. */ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (flag & FSYNC_WAIT) - xfs_trans_set_sync(tp); + xfs_trans_set_sync(tp); error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 8abe8f186e20..57335ba4ce53 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -18,8 +18,7 @@ int xfs_open(struct xfs_inode *ip); int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, struct cred *credp); int xfs_readlink(struct xfs_inode *ip, char *link); -int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start, - xfs_off_t stop); +int xfs_fsync(struct xfs_inode *ip); int xfs_release(struct xfs_inode *ip); int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, -- cgit v1.2.3 From 49383b0e98ad1f69ff4c816eb1961f703df12318 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Mon, 19 May 2008 16:29:34 +1000 Subject: [XFS] Don't allow memory reclaim to wait on the filesystem in inode writeback If we allow memory reclaim to wait on the pages under writeback in inode cluster writeback we could deadlock because we are currently holding the ILOCK on the initial writeback inode which is needed in data I/O completion to change the file size or do unwritten extent conversion before the pages are taken out of writeback state. SGI-PV: 981091 SGI-Modid: xfs-linux-melb:xfs-kern:31015a Signed-off-by: David Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index cf0bb9c1d621..739ea45a9d10 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2986,7 +2986,7 @@ xfs_iflush_cluster( ASSERT(pag->pag_ici_init); ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); - ilist = kmem_alloc(ilist_size, KM_MAYFAIL); + ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); if (!ilist) return 0; -- cgit v1.2.3 From c8f5f12e46f079a954d4f7163ba59dadee08ca26 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Tue, 20 May 2008 11:30:15 +1000 Subject: [XFS] Fix inode list allocation size in writeback. We only need to allocate space for the number of inodes in the cluster when writing back inodes, not every byte in the inode cluster. This reduces the amount of memory needing to be allocated to 256 bytes instead of 64k. SGI-PV: 981949 SGI-Modid: xfs-linux-melb:xfs-kern:31182a Signed-off-by: David Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_inode.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 739ea45a9d10..e569bf5d6cf0 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2974,6 +2974,7 @@ xfs_iflush_cluster( xfs_mount_t *mp = ip->i_mount; xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); unsigned long first_index, mask; + unsigned long inodes_per_cluster; int ilist_size; xfs_inode_t **ilist; xfs_inode_t *iq; @@ -2985,7 +2986,8 @@ xfs_iflush_cluster( ASSERT(pag->pagi_inodeok); ASSERT(pag->pag_ici_init); - ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); + inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; + ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); if (!ilist) return 0; @@ -2995,8 +2997,7 @@ xfs_iflush_cluster( read_lock(&pag->pag_ici_lock); /* really need a gang lookup range call here */ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, - first_index, - XFS_INODE_CLUSTER_SIZE(mp)); + first_index, inodes_per_cluster); if (nr_found == 0) goto out_free; -- cgit v1.2.3 From 6ab455eeaff6893cd06da33843e840d888cdc04a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 May 2008 16:34:42 +1000 Subject: [XFS] Fix memory corruption with small buffer reads When we have multiple buffers in a single page for a blocksize == pagesize filesystem we might overwrite the page contents if two callers hit it shortly after each other. To prevent that we need to keep the page locked until I/O is completed and the page marked uptodate. Thanks to Eric Sandeen for triaging this bug and finding a reproducible testcase and Dave Chinner for additional advice. This should fix kernel.org bz #10421. Tested-by: Eric Sandeen SGI-PV: 981813 SGI-Modid: xfs-linux-melb:xfs-kern:31173a Signed-off-by: Christoph Hellwig Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_buf.c | 24 ++++++++++++++++++++---- fs/xfs/linux-2.6/xfs_buf.h | 19 +++++++++++++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 5105015a75ad..98e0e86093b4 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -387,6 +387,8 @@ _xfs_buf_lookup_pages( if (unlikely(page == NULL)) { if (flags & XBF_READ_AHEAD) { bp->b_page_count = i; + for (i = 0; i < bp->b_page_count; i++) + unlock_page(bp->b_pages[i]); return -ENOMEM; } @@ -416,17 +418,24 @@ _xfs_buf_lookup_pages( ASSERT(!PagePrivate(page)); if (!PageUptodate(page)) { page_count--; - if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) { + if (blocksize >= PAGE_CACHE_SIZE) { + if (flags & XBF_READ) + bp->b_flags |= _XBF_PAGE_LOCKED; + } else if (!PagePrivate(page)) { if (test_page_region(page, offset, nbytes)) page_count++; } } - unlock_page(page); bp->b_pages[i] = page; offset = 0; } + if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { + for (i = 0; i < bp->b_page_count; i++) + unlock_page(bp->b_pages[i]); + } + if (page_count == bp->b_page_count) bp->b_flags |= XBF_DONE; @@ -746,6 +755,7 @@ xfs_buf_associate_memory( bp->b_count_desired = len; bp->b_buffer_length = buflen; bp->b_flags |= XBF_MAPPED; + bp->b_flags &= ~_XBF_PAGE_LOCKED; return 0; } @@ -1093,8 +1103,10 @@ _xfs_buf_ioend( xfs_buf_t *bp, int schedule) { - if (atomic_dec_and_test(&bp->b_io_remaining) == 1) + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { + bp->b_flags &= ~_XBF_PAGE_LOCKED; xfs_buf_ioend(bp, schedule); + } } STATIC void @@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io( if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); + + if (bp->b_flags & _XBF_PAGE_LOCKED) + unlock_page(page); } while (bvec >= bio->bi_io_vec); _xfs_buf_ioend(bp, 1); @@ -1163,7 +1178,8 @@ _xfs_buf_ioapply( * filesystem block size is not smaller than the page size. */ if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && - (bp->b_flags & XBF_READ) && + ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) == + (XBF_READ|_XBF_PAGE_LOCKED)) && (blocksize >= PAGE_CACHE_SIZE)) { bio = bio_alloc(GFP_NOIO, 1); diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 841d7883528d..f948ec7ba9a4 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -66,6 +66,25 @@ typedef enum { _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ + + /* + * Special flag for supporting metadata blocks smaller than a FSB. + * + * In this case we can have multiple xfs_buf_t on a single page and + * need to lock out concurrent xfs_buf_t readers as they only + * serialise access to the buffer. + * + * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation + * between reads of the page. Hence we can have one thread read the + * page and modify it, but then race with another thread that thinks + * the page is not up-to-date and hence reads it again. + * + * The result is that the first modifcation to the page is lost. + * This sort of AGF/AGI reading race can happen when unlinking inodes + * that require truncation and results in the AGI unlinked list + * modifications being lost. + */ + _XBF_PAGE_LOCKED = (1 << 22), } xfs_buf_flags_t; typedef enum { -- cgit v1.2.3 From aaa9bbe039febf1d3a0f3a374deea0680d9f5758 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 23 May 2008 17:38:32 +0000 Subject: [CIFS] remove unused variables CC: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 3 +-- fs/cifs/cifssmb.c | 6 ++---- fs/cifs/file.c | 7 ------- fs/cifs/misc.c | 3 +-- fs/cifs/readdir.c | 6 +----- 5 files changed, 5 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 08914053242b..9cfcf326ead3 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -333,7 +333,6 @@ struct cifsFileInfo { bool messageMode:1; /* for pipes: message vs byte mode */ atomic_t wrtPending; /* handle in use - defer close */ struct semaphore fh_sem; /* prevents reopen race after dead ses*/ - char *search_resume_name; /* BB removeme BB */ struct cifs_search_info srch_inf; }; @@ -626,7 +625,7 @@ GLOBAL_EXTERN atomic_t tcpSesAllocCount; GLOBAL_EXTERN atomic_t tcpSesReconnectCount; GLOBAL_EXTERN atomic_t tconInfoReconnectCount; -/* Various Debug counters to remove someday (BB) */ +/* Various Debug counters */ GLOBAL_EXTERN atomic_t bufAllocCount; /* current number allocated */ #ifdef CONFIG_CIFS_STATS2 GLOBAL_EXTERN atomic_t totBufAllocCount; /* total allocated over all time */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 9b8b4cfdf993..174bf8aca237 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1728,7 +1728,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, { int rc = 0; LOCK_REQ *pSMB = NULL; - LOCK_RSP *pSMBr = NULL; +/* LOCK_RSP *pSMBr = NULL; */ /* No response data other than rc to parse */ int bytes_returned; int timeout = 0; __u16 count; @@ -1739,8 +1739,6 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, if (rc) return rc; - pSMBr = (LOCK_RSP *)pSMB; /* BB removeme BB */ - if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) { timeout = CIFS_ASYNC_OP; /* no response expected */ pSMB->Timeout = 0; @@ -1774,7 +1772,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, if (waitFlag) { rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned); + (struct smb_hdr *) pSMB, &bytes_returned); cifs_small_buf_release(pSMB); } else { rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *)pSMB, diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8636cec2642c..0aac824371a5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -546,7 +546,6 @@ int cifs_close(struct inode *inode, struct file *file) msleep(timeout); timeout *= 8; } - kfree(pSMBFile->search_resume_name); kfree(file->private_data); file->private_data = NULL; } else @@ -605,12 +604,6 @@ int cifs_closedir(struct inode *inode, struct file *file) else cifs_buf_release(ptmp); } - ptmp = pCFileStruct->search_resume_name; - if (ptmp) { - cFYI(1, ("closedir free resume name")); - pCFileStruct->search_resume_name = NULL; - kfree(ptmp); - } kfree(file->private_data); file->private_data = NULL; } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1d69b8014e0b..4b17f8fe3157 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -519,8 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) pnotify = (struct file_notify_information *) ((char *)&pSMBr->hdr.Protocol + data_offset); cFYI(1, ("dnotify on %s Action: 0x%x", - pnotify->FileName, - pnotify->Action)); /* BB removeme BB */ + pnotify->FileName, pnotify->Action)); /* cifs_dump_mem("Rcvd notify Data: ",buf, sizeof(struct smb_hdr)+60); */ return true; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 713c25110197..df2c3c466ee1 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -675,8 +675,6 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, cifsFile->invalidHandle = true; CIFSFindClose(xid, pTcon, cifsFile->netfid); } - kfree(cifsFile->search_resume_name); - cifsFile->search_resume_name = NULL; if (cifsFile->srch_inf.ntwrk_buf_start) { cFYI(1, ("freeing SMB ff cache buf on search rewind")); if (cifsFile->srch_inf.smallBuf) @@ -1043,9 +1041,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) } /* else { cifsFile->invalidHandle = true; CIFSFindClose(xid, pTcon, cifsFile->netfid); - } - kfree(cifsFile->search_resume_name); - cifsFile->search_resume_name = NULL; */ + } */ rc = find_cifs_entry(xid, pTcon, file, ¤t_entry, &num_to_fill); -- cgit v1.2.3 From 4468eb3fd102cad559e51594a01cbc65b994d264 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 22 May 2008 09:31:40 -0400 Subject: on non-posix shares, clear write bits in mode when ATTR_READONLY is set When mounting a share with posix extensions disabled, cifs_get_inode_info turns off all the write bits in the mode for regular files if ATTR_READONLY is set. Directories and other inode types, however, can also have ATTR_READONLY set, but the mode gives no indication of this. This patch makes this apply to other inode types besides regular files. It also cleans up how modes are set in cifs_get_inode_info for both the "normal" and "dynperm" cases. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/inode.c | 76 ++++++++++++++++++++++++++----------------------------- fs/cifs/readdir.c | 71 ++++++++++++++++++++++++++++----------------------- 2 files changed, 75 insertions(+), 72 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 129dbfe4dca7..ae5bcaf2031c 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -418,6 +418,7 @@ int cifs_get_inode_info(struct inode **pinode, char *buf = NULL; bool adjustTZ = false; bool is_dfs_referral = false; + umode_t default_mode; pTcon = cifs_sb->tcon; cFYI(1, ("Getting info on %s", full_path)); @@ -530,47 +531,42 @@ int cifs_get_inode_info(struct inode **pinode, inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj; } - /* set default mode. will override for dirs below */ - if (atomic_read(&cifsInfo->inUse) == 0) - /* new inode, can safely set these fields */ - inode->i_mode = cifs_sb->mnt_file_mode; - else /* since we set the inode type below we need to mask off - to avoid strange results if type changes and both - get orred in */ - inode->i_mode &= ~S_IFMT; -/* if (attr & ATTR_REPARSE) */ - /* We no longer handle these as symlinks because we could not - follow them due to the absolute path with drive letter */ - if (attr & ATTR_DIRECTORY) { - /* override default perms since we do not do byte range locking - on dirs */ - inode->i_mode = cifs_sb->mnt_dir_mode; - inode->i_mode |= S_IFDIR; - } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && - (cifsInfo->cifsAttrs & ATTR_SYSTEM) && - /* No need to le64 convert size of zero */ - (pfindData->EndOfFile == 0)) { - inode->i_mode = cifs_sb->mnt_file_mode; - inode->i_mode |= S_IFIFO; -/* BB Finish for SFU style symlinks and devices */ - } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && - (cifsInfo->cifsAttrs & ATTR_SYSTEM)) { - if (decode_sfu_inode(inode, le64_to_cpu(pfindData->EndOfFile), - full_path, cifs_sb, xid)) - cFYI(1, ("Unrecognized sfu inode type")); - - cFYI(1, ("sfu mode 0%o", inode->i_mode)); + /* get default inode mode */ + if (attr & ATTR_DIRECTORY) + default_mode = cifs_sb->mnt_dir_mode; + else + default_mode = cifs_sb->mnt_file_mode; + + /* set permission bits */ + if (atomic_read(&cifsInfo->inUse) == 0 || + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) + inode->i_mode = default_mode; + else { + /* just reenable write bits if !ATTR_READONLY */ + if ((inode->i_mode & S_IWUGO) == 0 && + (attr & ATTR_READONLY) == 0) + inode->i_mode |= (S_IWUGO & default_mode); + inode->i_mode &= ~S_IFMT; + } + /* clear write bits if ATTR_READONLY is set */ + if (attr & ATTR_READONLY) + inode->i_mode &= ~S_IWUGO; + + /* set inode type */ + if ((attr & ATTR_SYSTEM) && + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { + /* no need to fix endianness on 0 */ + if (pfindData->EndOfFile == 0) + inode->i_mode |= S_IFIFO; + else if (decode_sfu_inode(inode, + le64_to_cpu(pfindData->EndOfFile), + full_path, cifs_sb, xid)) + cFYI(1, ("unknown SFU file type\n")); } else { - inode->i_mode |= S_IFREG; - /* treat dos attribute of read-only as read-only mode eg 555 */ - if (cifsInfo->cifsAttrs & ATTR_READONLY) - inode->i_mode &= ~(S_IWUGO); - else if ((inode->i_mode & S_IWUGO) == 0) - /* the ATTR_READONLY flag may have been */ - /* changed on server -- set any w bits */ - /* allowed by mnt_file_mode */ - inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode); - /* BB add code to validate if device or weird share or device type? */ + if (attr & ATTR_DIRECTORY) + inode->i_mode |= S_IFDIR; + else + inode->i_mode |= S_IFREG; } spin_lock(&inode->i_lock); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index df2c3c466ee1..83f306954883 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -132,6 +132,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, __u32 attr; __u64 allocation_size; __u64 end_of_file; + umode_t default_mode; /* save mtime and size */ local_mtime = tmp_inode->i_mtime; @@ -187,48 +188,54 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, if (atomic_read(&cifsInfo->inUse) == 0) { tmp_inode->i_uid = cifs_sb->mnt_uid; tmp_inode->i_gid = cifs_sb->mnt_gid; - /* set default mode. will override for dirs below */ - tmp_inode->i_mode = cifs_sb->mnt_file_mode; - } else { - /* mask off the type bits since it gets set - below and we do not want to get two type - bits set */ + } + + if (attr & ATTR_DIRECTORY) + default_mode = cifs_sb->mnt_dir_mode; + else + default_mode = cifs_sb->mnt_file_mode; + + /* set initial permissions */ + if ((atomic_read(&cifsInfo->inUse) == 0) || + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) + tmp_inode->i_mode = default_mode; + else { + /* just reenable write bits if !ATTR_READONLY */ + if ((tmp_inode->i_mode & S_IWUGO) == 0 && + (attr & ATTR_READONLY) == 0) + tmp_inode->i_mode |= (S_IWUGO & default_mode); + tmp_inode->i_mode &= ~S_IFMT; } - if (attr & ATTR_DIRECTORY) { - *pobject_type = DT_DIR; - /* override default perms since we do not lock dirs */ - if (atomic_read(&cifsInfo->inUse) == 0) - tmp_inode->i_mode = cifs_sb->mnt_dir_mode; - tmp_inode->i_mode |= S_IFDIR; - } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && - (attr & ATTR_SYSTEM)) { + /* clear write bits if ATTR_READONLY is set */ + if (attr & ATTR_READONLY) + tmp_inode->i_mode &= ~S_IWUGO; + + /* set inode type */ + if ((attr & ATTR_SYSTEM) && + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { if (end_of_file == 0) { - *pobject_type = DT_FIFO; tmp_inode->i_mode |= S_IFIFO; + *pobject_type = DT_FIFO; } else { - /* rather than get the type here, we mark the - inode as needing revalidate and get the real type - (blk vs chr vs. symlink) later ie in lookup */ - *pobject_type = DT_REG; + /* + * trying to get the type can be slow, so just call + * this a regular file for now, and mark for reval + */ tmp_inode->i_mode |= S_IFREG; + *pobject_type = DT_REG; cifsInfo->time = 0; } -/* we no longer mark these because we could not follow them */ -/* } else if (attr & ATTR_REPARSE) { - *pobject_type = DT_LNK; - tmp_inode->i_mode |= S_IFLNK; */ } else { - *pobject_type = DT_REG; - tmp_inode->i_mode |= S_IFREG; - if (attr & ATTR_READONLY) - tmp_inode->i_mode &= ~(S_IWUGO); - else if ((tmp_inode->i_mode & S_IWUGO) == 0) - /* the ATTR_READONLY flag may have been changed on */ - /* server -- set any w bits allowed by mnt_file_mode */ - tmp_inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode); - } /* could add code here - to validate if device or weird share type? */ + if (attr & ATTR_DIRECTORY) { + tmp_inode->i_mode |= S_IFDIR; + *pobject_type = DT_DIR; + } else { + tmp_inode->i_mode |= S_IFREG; + *pobject_type = DT_REG; + } + } /* can not fill in nlink here as in qpathinfo version and Unx search */ if (atomic_read(&cifsInfo->inUse) == 0) -- cgit v1.2.3 From b0fd30d3e7e768aad5e398caaea6ae5a5c814eab Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 22 May 2008 09:33:34 -0400 Subject: when creating new inodes, use file_mode/dir_mode exclusively on mount without unix extensions When CIFS creates a new inode on a mount without unix extensions, it temporarily assigns the mode that was passed to it in the create/mkdir call. Eventually, when the inode is revalidated, it changes to have the file_mode or dir_mode for the mount. This is confusing to users who expect that the mode shouldn't change this way. It's also problematic since only the mode is treated this way, not the uid or gid. Suppose you have a CIFS mount that's mounted with: uid=0,gid=0,file_mode=0666,dir_mode=0777 ...if an unprivileged user comes along and does this on the mount: mkdir -m 0700 foo touch foo/bar ...there is a period of time where the touch will fail, since the dir will initially be owned by root and have mode 0700. If the user waits long enough, then "foo" will be revalidated and will get the correct dir_mode permissions. This patch changes cifs_mkdir and cifs_create to not overwrite the mode found by the initial cifs_get_inode_info call after the inode is created on the server. Legacy behavior can be reenabled with the new "dynperm" mount option. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/dir.c | 4 +++- fs/cifs/inode.c | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f0b5b5f3dd2e..fb69c1fa85c9 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -260,7 +260,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, buf, inode->i_sb, xid, &fileHandle); if (newinode) { - newinode->i_mode = mode; + if (cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_DYNPERM) + newinode->i_mode = mode; if ((oplock & CIFS_CREATE_ACTION) && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index ae5bcaf2031c..12667d6bf305 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1015,8 +1015,11 @@ mkdir_get_info: CIFS_MOUNT_MAP_SPECIAL_CHR); } if (direntry->d_inode) { - direntry->d_inode->i_mode = mode; - direntry->d_inode->i_mode |= S_IFDIR; + if (cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_DYNPERM) + direntry->d_inode->i_mode = + (mode | S_IFDIR); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { direntry->d_inode->i_uid = -- cgit v1.2.3 From 4e94a105ed0df78e25b20ff8ed6761f5937662b1 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 23 May 2008 18:22:46 +0000 Subject: [CIFS] remove trailing whitespace Signed-off-by: Steve French --- fs/cifs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 12667d6bf305..ae6b725b3665 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1019,7 +1019,7 @@ mkdir_get_info: CIFS_MOUNT_DYNPERM) direntry->d_inode->i_mode = (mode | S_IFDIR); - + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { direntry->d_inode->i_uid = -- cgit v1.2.3 From 4ca691a892e8ab4f79583de1394f17a7dcfa2b57 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 22 May 2008 09:33:34 -0400 Subject: silently ignore ownership changes unless unix extensions are enabled or we're faking uid changes CIFS currently allows you to change the ownership of a file, but unless unix extensions are enabled this change is not passed off to the server. Have CIFS silently ignore ownership changes that can't be persistently stored on the server unless the "setuids" option is explicitly specified. We could return an error here (-EOPNOTSUPP or something), but this is how most disk-based windows filesystems on behave on Linux (e.g. VFAT, NTFS, etc). With cifsacl support and proper Windows to Unix idmapping support, we may be able to do this more properly in the future. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/inode.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index ae6b725b3665..fe752fdb26c3 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1546,13 +1546,26 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) } else goto cifs_setattr_exit; } - if (attrs->ia_valid & ATTR_UID) { - cFYI(1, ("UID changed to %d", attrs->ia_uid)); - uid = attrs->ia_uid; - } - if (attrs->ia_valid & ATTR_GID) { - cFYI(1, ("GID changed to %d", attrs->ia_gid)); - gid = attrs->ia_gid; + + /* + * Without unix extensions we can't send ownership changes to the + * server, so silently ignore them. This is consistent with how + * local DOS/Windows filesystems behave (VFAT, NTFS, etc). With + * CIFSACL support + proper Windows to Unix idmapping, we may be + * able to support this in the future. + */ + if (!pTcon->unix_ext && + !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { + attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); + } else { + if (attrs->ia_valid & ATTR_UID) { + cFYI(1, ("UID changed to %d", attrs->ia_uid)); + uid = attrs->ia_uid; + } + if (attrs->ia_valid & ATTR_GID) { + cFYI(1, ("GID changed to %d", attrs->ia_gid)); + gid = attrs->ia_gid; + } } time_buf.Attributes = 0; -- cgit v1.2.3 From 27adb44c4f671d15932eb0702a09d27244a8a7c1 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 23 May 2008 19:43:29 +0000 Subject: [CIFS] warn if both dynperm and cifsacl mount options specified Signed-off-by: Steve French --- fs/cifs/connect.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 023434f72c15..d49e274f8eba 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2120,6 +2120,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; } + if ((volume_info.cifs_acl) && (volume_info.dynperm)) + cERROR(1, ("mount option dynperm ignored if cifsacl " + "mount option supported")); + tcon = find_unc(sin_server.sin_addr.s_addr, volume_info.UNC, volume_info.username); -- cgit v1.2.3 From b7206153f61bb63ee2cffa63905b57ec01d20e6e Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 23 May 2008 20:35:07 +0000 Subject: [CIFS] Correct incorrect obscure open flag Also add defines for pipe subcommand codes Signed-off-by: Steve French --- fs/cifs/cifspdu.h | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 65d58b4e6a61..0f327c224da3 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -79,6 +79,19 @@ #define TRANS2_GET_DFS_REFERRAL 0x10 #define TRANS2_REPORT_DFS_INCOSISTENCY 0x11 +/* SMB Transact (Named Pipe) subcommand codes */ +#define TRANS_SET_NMPIPE_STATE 0x0001 +#define TRANS_RAW_READ_NMPIPE 0x0011 +#define TRANS_QUERY_NMPIPE_STATE 0x0021 +#define TRANS_QUERY_NMPIPE_INFO 0x0022 +#define TRANS_PEEK_NMPIPE 0x0023 +#define TRANS_TRANSACT_NMPIPE 0x0026 +#define TRANS_RAW_WRITE_NMPIPE 0x0031 +#define TRANS_READ_NMPIPE 0x0036 +#define TRANS_WRITE_NMPIPE 0x0037 +#define TRANS_WAIT_NMPIPE 0x0053 +#define TRANS_CALL_NMPIPE 0x0054 + /* NT Transact subcommand codes */ #define NT_TRANSACT_CREATE 0x01 #define NT_TRANSACT_IOCTL 0x02 @@ -328,12 +341,13 @@ #define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */ #define CREATE_NO_EA_KNOWLEDGE 0x00000200 #define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete - open for recovery flag - should - be zero */ + "open for recovery" flag - should + be zero in any case */ +#define CREATE_OPEN_FOR_RECOVERY 0x00000400 #define CREATE_RANDOM_ACCESS 0x00000800 #define CREATE_DELETE_ON_CLOSE 0x00001000 #define CREATE_OPEN_BY_ID 0x00002000 -#define CREATE_OPEN_BACKUP_INTN 0x00004000 +#define CREATE_OPEN_BACKUP_INTENT 0x00004000 #define CREATE_NO_COMPRESSION 0x00008000 #define CREATE_RESERVE_OPFILTER 0x00100000 /* should be zero */ #define OPEN_REPARSE_POINT 0x00200000 @@ -722,7 +736,6 @@ typedef struct smb_com_tconx_rsp_ext { #define SMB_CSC_CACHE_AUTO_REINT 0x0004 #define SMB_CSC_CACHE_VDO 0x0008 #define SMB_CSC_NO_CACHING 0x000C - #define SMB_UNIQUE_FILE_NAME 0x0010 #define SMB_EXTENDED_SIGNATURES 0x0020 @@ -806,7 +819,7 @@ typedef struct smb_com_findclose_req { #define ICOUNT_MASK 0x00FF #define PIPE_READ_MODE 0x0100 #define NAMED_PIPE_TYPE 0x0400 -#define PIPE_END_POINT 0x0800 +#define PIPE_END_POINT 0x4000 #define BLOCKING_NAMED_PIPE 0x8000 typedef struct smb_com_open_req { /* also handles create */ -- cgit v1.2.3 From 03fb0bce01490c9bdedad861962c76f987531014 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 23 May 2008 13:04:19 -0700 Subject: fuse: fix bdi naming conflict Fuse allocates a separate bdi for each filesystem, and registers them in sysfs with "MAJOR:MINOR" of sb->s_dev (st_dev). This works fine for anon devices normally used by fuse, but can conflict with an already registered BDI for "fuseblk" filesystems, where sb->s_dev represents a real block device. In particularl this happens if a non-partitioned device is being mounted. Fix by registering with a different name for "fuseblk" filesystems. Thanks to Ioan Ionita for the bug report. Signed-off-by: Miklos Szeredi Reported-by: Ioan Ionita Tested-by: Ioan Ionita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index fb77e0962132..43e99513334a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -488,7 +488,12 @@ static struct fuse_conn *new_conn(struct super_block *sb) err = bdi_init(&fc->bdi); if (err) goto error_kfree; - err = bdi_register_dev(&fc->bdi, fc->dev); + if (sb->s_bdev) { + err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", + MAJOR(fc->dev), MINOR(fc->dev)); + } else { + err = bdi_register_dev(&fc->bdi, fc->dev); + } if (err) goto error_bdi_destroy; /* -- cgit v1.2.3 From 71fd5179e8d1d4d503b517e0c5374f7c49540bfc Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 23 May 2008 13:04:20 -0700 Subject: ecryptfs: fix missed mutex_unlock Cc: Michael Halcrow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index cd62d75b2cc0..e2832bc7869a 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1906,9 +1906,9 @@ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, goto out; } } - mutex_unlock(&key_tfm_list_mutex); (*tfm) = key_tfm->key_tfm; (*tfm_mutex) = &key_tfm->key_tfm_mutex; out: + mutex_unlock(&key_tfm_list_mutex); return rc; } -- cgit v1.2.3 From 80bfc25f42db6d4715c7688ae2352c5a8038fe7e Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Fri, 23 May 2008 13:04:22 -0700 Subject: ntfs: le*_add_cpu conversion replace all: little_endian_variable = cpu_to_leX(leX_to_cpu(little_endian_variable) + expression_in_cpu_byteorder); with: leX_add_cpu(&little_endian_variable, expression_in_cpu_byteorder); generated with semantic patch Signed-off-by: Marcin Slusarz Acked-by: Anton Altaparmakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ntfs/upcase.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ntfs/upcase.c b/fs/ntfs/upcase.c index 9101807dc81a..e2f72ca98037 100644 --- a/fs/ntfs/upcase.c +++ b/fs/ntfs/upcase.c @@ -77,11 +77,10 @@ ntfschar *generate_default_upcase(void) uc[i] = cpu_to_le16(i); for (r = 0; uc_run_table[r][0]; r++) for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++) - uc[i] = cpu_to_le16(le16_to_cpu(uc[i]) + - uc_run_table[r][2]); + le16_add_cpu(&uc[i], uc_run_table[r][2]); for (r = 0; uc_dup_table[r][0]; r++) for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2) - uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1); + le16_add_cpu(&uc[i + 1], -1); for (r = 0; uc_word_table[r][0]; r++) uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]); return uc; -- cgit v1.2.3 From 80119ef5c8153e0a6cc5edf00c083dc98a9bd348 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 23 May 2008 13:04:31 -0700 Subject: mm: fix atomic_t overflow in vm The atomic_t type is 32bit but a 64bit system can have more than 2^32 pages of virtual address space available. Without this we overflow on ludicrously large mappings Signed-off-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 74a323d2b850..32dc14cd8900 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -139,7 +139,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, #define K(x) ((x) << (PAGE_SHIFT - 10)) si_meminfo(&i); si_swapinfo(&i); - committed = atomic_read(&vm_committed_space); + committed = atomic_long_read(&vm_committed_space); allowed = ((totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100) + total_swap_pages; -- cgit v1.2.3 From c4185a0e019387f5ad6e99009804965531fa1fab Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Fri, 23 May 2008 13:04:47 -0700 Subject: proc: proc_get_inode() should get module only once Any file under /proc/net opened more than once leaked the refcounter on the module it belongs to. The problem is that module_get is called for each file opening while module_put is called only when /proc inode is destroyed. So, lets put module counter if we are dealing with already initialised inode. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=10737 Signed-off-by: Denis V. Lunev Cc: David Miller Cc: Patrick McHardy Acked-by: Pavel Emelyanov Acked-by: Robert Olsson Acked-by: Eric W. Biederman Reported-by: Roland Kletzing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 6f4e8dc97da1..b08d10017911 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -425,7 +425,8 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, } } unlock_new_inode(inode); - } + } else + module_put(de->owner); return inode; out_ino: -- cgit v1.2.3 From 5132861a7a44498ebb18357473f8b8d4cdc70e9f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 22 May 2008 09:33:34 -0400 Subject: disable most mode changes on non-unix/non-cifsacl mounts CIFS currently allows you to change the mode of an inode on a share that doesn't have unix extensions enabled, and isn't using cifsacl. The inode in this case *only* has its mode changed in memory on the client. This is problematic since it can change any time the inode is purged from the cache. This patch makes cifs_setattr silently ignore most mode changes when unix extensions and cifsacl support are not enabled, and when the share is not mounted with the "dynperm" option. The exceptions are: When a mode change would remove all write access to an inode we turn on the ATTR_READONLY bit on the server and remove all write bits from the inode's mode in memory. When a mode change would add a write bit to an inode that previously had them all turned off, it turns off the ATTR_READONLY bit on the server, and resets the mode back to what it would normally be (generally, the file_mode or dir_mode of the share). Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/inode.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index fe752fdb26c3..722be543ceec 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1575,7 +1575,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) attrs->ia_valid &= ~ATTR_MODE; if (attrs->ia_valid & ATTR_MODE) { - cFYI(1, ("Mode changed to 0x%x", attrs->ia_mode)); + cFYI(1, ("Mode changed to 0%o", attrs->ia_mode)); mode = attrs->ia_mode; } @@ -1590,18 +1590,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) #ifdef CONFIG_CIFS_EXPERIMENTAL if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) rc = mode_to_acl(inode, full_path, mode); - else if ((mode & S_IWUGO) == 0) { -#else - if ((mode & S_IWUGO) == 0) { + else #endif - /* not writeable */ - if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) { - set_dosattr = true; - time_buf.Attributes = - cpu_to_le32(cifsInode->cifsAttrs | - ATTR_READONLY); - } - } else if (cifsInode->cifsAttrs & ATTR_READONLY) { + if (((mode & S_IWUGO) == 0) && + (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { + set_dosattr = true; + time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs | + ATTR_READONLY); + /* fix up mode if we're not using dynperm */ + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) + attrs->ia_mode = inode->i_mode & ~S_IWUGO; + } else if ((mode & S_IWUGO) && + (cifsInode->cifsAttrs & ATTR_READONLY)) { /* If file is readonly on server, we would not be able to write to it - so if any write bit is enabled for user or group or other we @@ -1612,6 +1612,20 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) /* Windows ignores set to zero */ if (time_buf.Attributes == 0) time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); + + /* reset local inode permissions to normal */ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { + attrs->ia_mode &= ~(S_IALLUGO); + if (S_ISDIR(inode->i_mode)) + attrs->ia_mode |= + cifs_sb->mnt_dir_mode; + else + attrs->ia_mode |= + cifs_sb->mnt_file_mode; + } + } else if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { + /* ignore mode change - ATTR_READONLY hasn't changed */ + attrs->ia_valid &= ~ATTR_MODE; } } -- cgit v1.2.3 From 03cddb80ed2dacaf03c370d38bcc75f8303a03b8 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 5 Jun 2008 20:59:29 -0400 Subject: ext4: Fix use of uninitialized data with debug enabled. Fix use of uninitialized data with debug enabled. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 873ad9b3418c..c9900aade150 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2745,8 +2745,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, sbi = EXT4_SB(sb); es = sbi->s_es; - ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group, - gdp->bg_free_blocks_count); err = -EIO; bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); @@ -2762,6 +2760,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (!gdp) goto out_err; + ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group, + gdp->bg_free_blocks_count); + err = ext4_journal_get_write_access(handle, gdp_bh); if (err) goto out_err; @@ -3094,8 +3095,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa) { - unsigned len = ac->ac_o_ex.fe_len; - + unsigned int len = ac->ac_o_ex.fe_len; ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, &ac->ac_b_ex.fe_group, &ac->ac_b_ex.fe_start); -- cgit v1.2.3 From 0bf7e8379ce7e0159a2a6bd3d937f2f6ada79799 Mon Sep 17 00:00:00 2001 From: "Jose R. Santos" Date: Tue, 3 Jun 2008 14:07:29 -0400 Subject: ext4: Fix uninit block group initialization with FLEX_BG With FLEX_BG block bitmaps, inode bitmaps and inode tables _MAY_ be allocated outside the group. So, when initializing an uninitialized block bitmap, we need to check the location of this blocks before setting the corresponding bits in the block bitmap of the newly initialized group. Also return the right number of free blocks when counting the available free blocks in uninit group. Tested-by: Aneesh Kumar K.V Signed-off-by: Jose R. Santos Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 30494c5da843..9cc80b9cc8d8 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -43,6 +43,46 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, } +static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block, + ext4_group_t block_group) +{ + ext4_group_t actual_group; + ext4_get_group_no_and_offset(sb, block, &actual_group, 0); + if (actual_group == block_group) + return 1; + return 0; +} + +static int ext4_group_used_meta_blocks(struct super_block *sb, + ext4_group_t block_group) +{ + ext4_fsblk_t tmp; + struct ext4_sb_info *sbi = EXT4_SB(sb); + /* block bitmap, inode bitmap, and inode table blocks */ + int used_blocks = sbi->s_itb_per_group + 2; + + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { + struct ext4_group_desc *gdp; + struct buffer_head *bh; + + gdp = ext4_get_group_desc(sb, block_group, &bh); + if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), + block_group)) + used_blocks--; + + if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), + block_group)) + used_blocks--; + + tmp = ext4_inode_table(sb, gdp); + for (; tmp < ext4_inode_table(sb, gdp) + + sbi->s_itb_per_group; tmp++) { + if (!ext4_block_in_group(sb, tmp, block_group)) + used_blocks -= 1; + } + } + return used_blocks; +} /* Initializes an uninitialized block bitmap if given, and returns the * number of blocks free in the group. */ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, @@ -105,20 +145,34 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, free_blocks = group_blocks - bit_max; if (bh) { - ext4_fsblk_t start; + ext4_fsblk_t start, tmp; + int flex_bg = 0; for (bit = 0; bit < bit_max; bit++) ext4_set_bit(bit, bh->b_data); start = ext4_group_first_block_no(sb, block_group); - /* Set bits for block and inode bitmaps, and inode table */ - ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); - ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data); - for (bit = (ext4_inode_table(sb, gdp) - start), - bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++) - ext4_set_bit(bit, bh->b_data); + if (EXT4_HAS_INCOMPAT_FEATURE(sb, + EXT4_FEATURE_INCOMPAT_FLEX_BG)) + flex_bg = 1; + /* Set bits for block and inode bitmaps, and inode table */ + tmp = ext4_block_bitmap(sb, gdp); + if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + ext4_set_bit(tmp - start, bh->b_data); + + tmp = ext4_inode_bitmap(sb, gdp); + if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + ext4_set_bit(tmp - start, bh->b_data); + + tmp = ext4_inode_table(sb, gdp); + for (; tmp < ext4_inode_table(sb, gdp) + + sbi->s_itb_per_group; tmp++) { + if (!flex_bg || + ext4_block_in_group(sb, tmp, block_group)) + ext4_set_bit(tmp - start, bh->b_data); + } /* * Also if the number of blocks within the group is * less than the blocksize * 8 ( which is the size @@ -126,8 +180,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, */ mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); } - - return free_blocks - sbi->s_itb_per_group - 2; + return free_blocks - ext4_group_used_meta_blocks(sb, block_group); } -- cgit v1.2.3 From 944600930a37aa725ba6f93c3244e2d77a1e3581 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 6 Jun 2008 18:05:52 -0400 Subject: ext4: fix online resize bug There is a bug when we are trying to verify that the reserve inode's double indirect blocks point back to the primary gdt blocks. The fix is obvious, we need to mod the gdb count by the addr's per block. This was verified using the same testcase as with the ext3 equivalent of this patch. Signed-off-by: Josef Bacik Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9f086a6a472b..9ecb92f68543 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -563,7 +563,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, } blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count; - data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count; + data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count % + EXT4_ADDR_PER_BLOCK(sb)); end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb); /* Get each reserved primary GDT block and verify it holds backups */ -- cgit v1.2.3 From 8ea76900be3b4522396e2021260d2818a27b3a5b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 26 May 2008 10:28:09 -0400 Subject: jbd2: Fix memory leak when verifying checksums in the journal Cc: Andreas Dilger Cc: Girish Shilamkar Signed-off-by: "Theodore Ts'o" --- fs/jbd2/recovery.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 5d0405a9e7ca..7199db52b2fd 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -344,6 +344,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh, *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, obh->b_size); } + put_bh(obh); } return 0; } -- cgit v1.2.3 From 624080eded68738daee041ad64672a9d2614754f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 6 Jun 2008 17:50:40 -0400 Subject: jbd2: If a journal checksum error is detected, propagate the error to ext4 If a journal checksum error is detected, the ext4 filesystem will call ext4_error(), and the mount will either continue, become a read-only mount, or cause a kernel panic based on the superblock flags indicating the user's preference of what to do in case of filesystem corruption being detected. Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 23 +++++++++++++++++++++++ fs/jbd2/recovery.c | 11 ++++------- 2 files changed, 27 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 09d9359c8055..d01a32e8b50a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2189,6 +2189,29 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { if (ext4_load_journal(sb, es, journal_devnum)) goto failed_mount3; + if (!(sb->s_flags & MS_RDONLY) && + EXT4_SB(sb)->s_journal->j_failed_commit) { + printk(KERN_CRIT "EXT4-fs error (device %s): " + "ext4_fill_super: Journal transaction " + "%u is corrupt\n", sb->s_id, + EXT4_SB(sb)->s_journal->j_failed_commit); + if (test_opt (sb, ERRORS_RO)) { + printk (KERN_CRIT + "Mounting filesystem read-only\n"); + sb->s_flags |= MS_RDONLY; + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + } + if (test_opt(sb, ERRORS_PANIC)) { + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + ext4_commit_super(sb, es, 1); + printk(KERN_CRIT + "EXT4-fs (device %s): mount failed\n", + sb->s_id); + goto failed_mount4; + } + } } else if (journal_inum) { if (ext4_create_journal(sb, es, journal_inum)) goto failed_mount3; diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 7199db52b2fd..058f50f65b76 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -611,9 +611,8 @@ static int do_one_pass(journal_t *journal, chksum_err = chksum_seen = 0; if (info->end_transaction) { - printk(KERN_ERR "JBD: Transaction %u " - "found to be corrupt.\n", - next_commit_ID - 1); + journal->j_failed_commit = + info->end_transaction; brelse(bh); break; } @@ -644,10 +643,8 @@ static int do_one_pass(journal_t *journal, if (!JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ - printk(KERN_ERR - "JBD: Transaction %u " - "found to be corrupt.\n", - next_commit_ID); + journal->j_failed_commit = + next_commit_ID; brelse(bh); break; } -- cgit v1.2.3 From cd0b6a39a1d68b61b1073662f40f747c8b728f98 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 26 May 2008 10:28:28 -0400 Subject: ext4: Display the journal_async_commit mount option in /proc/mounts Cc: Andreas Dilger Cc: Girish Shilamkar Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d01a32e8b50a..ba92c606ad9a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -731,6 +731,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) } if (test_opt(sb, BARRIER)) seq_puts(seq, ",barrier=1"); + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) + seq_puts(seq, ",journal_async_commit"); if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); if (!test_opt(sb, EXTENTS)) -- cgit v1.2.3 From 034772b068be62a79470d6c1b81b01fbe27793ac Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 3 Jun 2008 22:31:11 -0400 Subject: jbd2: Fix barrier fallback code to re-lock the buffer head If the device doesn't support write barriers, the write is retried without ordered mode. But the buffer head needs to be re-locked or submit_bh will fail with on BUG(!buffer_locked(bh)). Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 4d99685fdce4..a2ed72f7ceee 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -168,6 +168,7 @@ static int journal_submit_commit_record(journal_t *journal, spin_unlock(&journal->j_state_lock); /* And try again, without the barrier */ + lock_buffer(bh); set_buffer_uptodate(bh); set_buffer_dirty(bh); ret = submit_bh(WRITE, bh); -- cgit v1.2.3 From 571640cad3fda6475da45d91cf86076f1f86bd9b Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 26 May 2008 12:29:46 -0400 Subject: ext4: enable barriers by default I can't think of any valid reason for ext4 to not use barriers when they are available; I believe this is necessary for filesystem integrity in the face of a volatile write cache on storage. An administrator who trusts that the cache is sufficiently battery- backed (and power supplies are sufficiently redundant, etc...) can always turn it back off again. SuSE has carried such a patch for ext3 for quite some time now. Also document the mount option while we're at it. Signed-off-by: Eric Sandeen Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ba92c606ad9a..cb96f127c366 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -671,6 +671,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) unsigned long def_mount_opts; struct super_block *sb = vfs->mnt_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); + journal_t *journal = sbi->s_journal; struct ext4_super_block *es = sbi->s_es; def_mount_opts = le32_to_cpu(es->s_default_mount_opts); @@ -729,8 +730,13 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_printf(seq, ",commit=%u", (unsigned) (sbi->s_commit_interval / HZ)); } - if (test_opt(sb, BARRIER)) - seq_puts(seq, ",barrier=1"); + /* + * We're changing the default of barrier mount option, so + * let's always display its mount state so it's clear what its + * status is. + */ + seq_puts(seq, ",barrier="); + seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) seq_puts(seq, ",journal_async_commit"); if (test_opt(sb, NOBH)) @@ -1909,6 +1915,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_resgid = le16_to_cpu(es->s_def_resgid); set_opt(sbi->s_mount_opt, RESERVATION); + set_opt(sbi->s_mount_opt, BARRIER); /* * turn on extents feature by default in ext4 filesystem -- cgit v1.2.3 From cbaffba12ce08beb3e80bfda148ee0fa14aac188 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 26 May 2008 20:55:42 +0400 Subject: posix timers: discard SI_TIMER signals on exec Based on Roland's patch. This approach was suggested by Austin Clements from the very beginning, and then by Linus. As Austin pointed out, the execing task can be killed by SI_TIMER signal because exec flushes the signal handlers, but doesn't discard the pending signals generated by posix timers. Perhaps not a bug, but people find this surprising. See http://bugzilla.kernel.org/show_bug.cgi?id=10460 Signed-off-by: Oleg Nesterov Cc: Austin Clements Cc: Roland McGrath Signed-off-by: Linus Torvalds --- fs/exec.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 3c2ba7ce11d4..9448f1b50b4a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -860,6 +860,7 @@ static int de_thread(struct task_struct *tsk) no_thread_group: exit_itimers(sig); + flush_itimer_signals(); if (leader) release_task(leader); -- cgit v1.2.3 From a82c53a0e3f57f02782330372b7adad67b417645 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 9 May 2008 13:28:36 +0200 Subject: splice: fix sendfile() issue with relay Splice isn't always incrementing the ppos correctly, which broke relay splice. Signed-off-by: Tom Zanussi Tested-by: Dan Williams Signed-off-by: Jens Axboe --- fs/splice.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index 78150038b584..a048ad2130c3 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -983,7 +983,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, while (len) { size_t read_len; - loff_t pos = sd->pos; + loff_t pos = sd->pos, prev_pos = pos; ret = do_splice_to(in, &pos, pipe, len, flags); if (unlikely(ret <= 0)) @@ -998,15 +998,19 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, * could get stuck data in the internal pipe: */ ret = actor(pipe, sd); - if (unlikely(ret <= 0)) + if (unlikely(ret <= 0)) { + sd->pos = prev_pos; goto out_release; + } bytes += ret; len -= ret; sd->pos = pos; - if (ret < read_len) + if (ret < read_len) { + sd->pos = prev_pos + ret; goto out_release; + } } done: @@ -1072,7 +1076,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, ret = splice_direct_to_actor(in, &sd, direct_splice_actor); if (ret > 0) - *ppos += ret; + *ppos = sd.pos; return ret; } -- cgit v1.2.3 From ca39d651d17df49b6d11f851d56c0ce0ce01ea1a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 20 May 2008 21:27:41 +0200 Subject: splice: handle try_to_release_page() failure splice currently assumes that try_to_release_page() always suceeds, but it can return failure. If it does, we cannot steal the page. Acked-by: Mingming Cao --- fs/splice.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index a048ad2130c3..aa5f6f60b305 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -58,8 +58,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, */ wait_on_page_writeback(page); - if (PagePrivate(page)) - try_to_release_page(page, GFP_KERNEL); + if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) + goto out_unlock; /* * If we succeeded in removing the mapping, set LRU flag @@ -75,6 +75,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, * Raced with truncate or failed to remove page from current * address space, unlock and return failure. */ +out_unlock: unlock_page(page); return 1; } -- cgit v1.2.3 From a12630b186d56a77d17c9b34c82b88dda4337ed7 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 9 May 2008 18:49:29 -0700 Subject: ocfs2: Rename 'user_stack' plugin structure to 'ocfs2_user_plugin' The static structure describing the userspace cluster plugin for ocfs2 was named 'user_stack', which is a real pain when people are grep(1)ing the tree for the program stack object 'user_stack'. Change the name to something distinct and namespaced. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stack_user.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index b503772cd0ec..6b97d11f6bf8 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -61,7 +61,7 @@ * negotiated by the client. The client negotiates based on the maximum * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major * number from the "SETV" message must match - * user_stack.sp_proto->lp_max_version.pv_major, and the minor number + * ocfs2_user_plugin.sp_proto->lp_max_version.pv_major, and the minor number * must be less than or equal to ...->lp_max_version.pv_minor. * * Once this information has been set, mounts will be allowed. From this @@ -153,7 +153,7 @@ union ocfs2_control_message { struct ocfs2_control_message_down u_down; }; -static struct ocfs2_stack_plugin user_stack; +static struct ocfs2_stack_plugin ocfs2_user_plugin; static atomic_t ocfs2_control_opened; static int ocfs2_control_this_node = -1; @@ -399,7 +399,7 @@ static int ocfs2_control_do_setversion_msg(struct file *file, char *ptr = NULL; struct ocfs2_control_private *p = file->private_data; struct ocfs2_protocol_version *max = - &user_stack.sp_proto->lp_max_version; + &ocfs2_user_plugin.sp_proto->lp_max_version; if (ocfs2_control_get_handshake_state(file) != OCFS2_CONTROL_HANDSHAKE_PROTOCOL) @@ -680,7 +680,7 @@ static void fsdlm_lock_ast_wrapper(void *astarg) struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg); int status = lksb->sb_status; - BUG_ON(user_stack.sp_proto == NULL); + BUG_ON(ocfs2_user_plugin.sp_proto == NULL); /* * For now we're punting on the issue of other non-standard errors @@ -693,16 +693,16 @@ static void fsdlm_lock_ast_wrapper(void *astarg) */ if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) - user_stack.sp_proto->lp_unlock_ast(astarg, 0); + ocfs2_user_plugin.sp_proto->lp_unlock_ast(astarg, 0); else - user_stack.sp_proto->lp_lock_ast(astarg); + ocfs2_user_plugin.sp_proto->lp_lock_ast(astarg); } static void fsdlm_blocking_ast_wrapper(void *astarg, int level) { - BUG_ON(user_stack.sp_proto == NULL); + BUG_ON(ocfs2_user_plugin.sp_proto == NULL); - user_stack.sp_proto->lp_blocking_ast(astarg, level); + ocfs2_user_plugin.sp_proto->lp_blocking_ast(astarg, level); } static int user_dlm_lock(struct ocfs2_cluster_connection *conn, @@ -838,7 +838,7 @@ static int user_cluster_this_node(unsigned int *this_node) return 0; } -static struct ocfs2_stack_operations user_stack_ops = { +static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { .connect = user_cluster_connect, .disconnect = user_cluster_disconnect, .this_node = user_cluster_this_node, @@ -849,20 +849,20 @@ static struct ocfs2_stack_operations user_stack_ops = { .dump_lksb = user_dlm_dump_lksb, }; -static struct ocfs2_stack_plugin user_stack = { +static struct ocfs2_stack_plugin ocfs2_user_plugin = { .sp_name = "user", - .sp_ops = &user_stack_ops, + .sp_ops = &ocfs2_user_plugin_ops, .sp_owner = THIS_MODULE, }; -static int __init user_stack_init(void) +static int __init ocfs2_user_plugin_init(void) { int rc; rc = ocfs2_control_init(); if (!rc) { - rc = ocfs2_stack_glue_register(&user_stack); + rc = ocfs2_stack_glue_register(&ocfs2_user_plugin); if (rc) ocfs2_control_exit(); } @@ -870,14 +870,14 @@ static int __init user_stack_init(void) return rc; } -static void __exit user_stack_exit(void) +static void __exit ocfs2_user_plugin_exit(void) { - ocfs2_stack_glue_unregister(&user_stack); + ocfs2_stack_glue_unregister(&ocfs2_user_plugin); ocfs2_control_exit(); } MODULE_AUTHOR("Oracle"); MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks"); MODULE_LICENSE("GPL"); -module_init(user_stack_init); -module_exit(user_stack_exit); +module_init(ocfs2_user_plugin_init); +module_exit(ocfs2_user_plugin_exit); -- cgit v1.2.3 From 271d772d02507c7541d5e6b4938ed2380e59a39a Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 12 May 2008 18:31:35 -0700 Subject: [PATCH 1/3] ocfs2/net: Silence build warnings This patch silences the build warnings concerning o2net_debugfs_init() and friends when building without CONFIG_DEBUG_FS enabled. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/tcp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index a705d5d19036..fd6179eb26d4 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h @@ -128,23 +128,23 @@ void o2net_debug_del_nst(struct o2net_send_tracking *nst); void o2net_debug_add_sc(struct o2net_sock_container *sc); void o2net_debug_del_sc(struct o2net_sock_container *sc); #else -static int o2net_debugfs_init(void) +static inline int o2net_debugfs_init(void) { return 0; } -static void o2net_debugfs_exit(void) +static inline void o2net_debugfs_exit(void) { } -static void o2net_debug_add_nst(struct o2net_send_tracking *nst) +static inline void o2net_debug_add_nst(struct o2net_send_tracking *nst) { } -static void o2net_debug_del_nst(struct o2net_send_tracking *nst) +static inline void o2net_debug_del_nst(struct o2net_send_tracking *nst) { } -static void o2net_debug_add_sc(struct o2net_sock_container *sc) +static inline void o2net_debug_add_sc(struct o2net_sock_container *sc) { } -static void o2net_debug_del_sc(struct o2net_sock_container *sc) +static inline void o2net_debug_del_sc(struct o2net_sock_container *sc) { } #endif /* CONFIG_DEBUG_FS */ -- cgit v1.2.3 From 959040c37a8cae8117907d4aed87f1b01ff1ea19 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 12 May 2008 18:31:36 -0700 Subject: [PATCH 2/3] ocfs2/dlm: Silence build warnings This patch silences the build warnings concerning dlm_debug_init() and friends when building without CONFIG_DEBUG_FS enabled. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmdebug.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h index d34a62a3a625..8c686d22f9c7 100644 --- a/fs/ocfs2/dlm/dlmdebug.h +++ b/fs/ocfs2/dlm/dlmdebug.h @@ -60,25 +60,25 @@ void dlm_destroy_debugfs_root(void); #else -static int dlm_debug_init(struct dlm_ctxt *dlm) +static inline int dlm_debug_init(struct dlm_ctxt *dlm) { return 0; } -static void dlm_debug_shutdown(struct dlm_ctxt *dlm) +static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm) { } -static int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) +static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) { return 0; } -static void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) +static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) { } -static int dlm_create_debugfs_root(void) +static inline int dlm_create_debugfs_root(void) { return 0; } -static void dlm_destroy_debugfs_root(void) +static inline void dlm_destroy_debugfs_root(void) { } -- cgit v1.2.3 From 0f475b2abed6cbccee1da20a0bef2895eb2a0edd Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 12 May 2008 18:31:37 -0700 Subject: [PATCH 3/3] ocfs2/net: Silence build warnings This patch silences the build warnings concerning o2net_init_nst() and friends when building without CONFIG_DEBUG_FS enabled. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/tcp.c | 28 +++++++++------------------- fs/ocfs2/cluster/tcp_internal.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1e44ad14881a..a27d61581bd6 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -142,53 +142,43 @@ static void o2net_idle_timer(unsigned long data); static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); -static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, - u32 msgkey, struct task_struct *task, u8 node) -{ #ifdef CONFIG_DEBUG_FS +void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node) +{ INIT_LIST_HEAD(&nst->st_net_debug_item); nst->st_task = task; nst->st_msg_type = msgtype; nst->st_msg_key = msgkey; nst->st_node = node; -#endif } -static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) +void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) { -#ifdef CONFIG_DEBUG_FS do_gettimeofday(&nst->st_sock_time); -#endif } -static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) +void o2net_set_nst_send_time(struct o2net_send_tracking *nst) { -#ifdef CONFIG_DEBUG_FS do_gettimeofday(&nst->st_send_time); -#endif } -static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) +void o2net_set_nst_status_time(struct o2net_send_tracking *nst) { -#ifdef CONFIG_DEBUG_FS do_gettimeofday(&nst->st_status_time); -#endif } -static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, +void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, struct o2net_sock_container *sc) { -#ifdef CONFIG_DEBUG_FS nst->st_sc = sc; -#endif } -static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) +void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) { -#ifdef CONFIG_DEBUG_FS nst->st_id = msg_id; -#endif } +#endif /* CONFIG_DEBUG_FS */ static inline int o2net_reconnect_delay(void) { diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 8d58cfe410b1..18307ff81b77 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -224,10 +224,42 @@ struct o2net_send_tracking { struct timeval st_send_time; struct timeval st_status_time; }; + +void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node); +void o2net_set_nst_sock_time(struct o2net_send_tracking *nst); +void o2net_set_nst_send_time(struct o2net_send_tracking *nst); +void o2net_set_nst_status_time(struct o2net_send_tracking *nst); +void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, + struct o2net_sock_container *sc); +void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id); + #else struct o2net_send_tracking { u32 dummy; }; + +static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node) +{ +} +static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) +{ +} +static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) +{ +} +static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) +{ +} +static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, + struct o2net_sock_container *sc) +{ +} +static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, + u32 msg_id) +{ +} #endif /* CONFIG_DEBUG_FS */ #endif /* O2CLUSTER_TCP_INTERNAL_H */ -- cgit v1.2.3 From ca05a99a54db1db5bca72eccb5866d2a86f8517f Mon Sep 17 00:00:00 2001 From: "Andrew G. Morgan" Date: Tue, 27 May 2008 22:05:17 -0700 Subject: capabilities: remain source compatible with 32-bit raw legacy capability support. Source code out there hard-codes a notion of what the _LINUX_CAPABILITY_VERSION #define means in terms of the semantics of the raw capability system calls capget() and capset(). Its unfortunate, but true. Since the confusing header file has been in a released kernel, there is software that is erroneously using 64-bit capabilities with the semantics of 32-bit compatibilities. These recently compiled programs may suffer corruption of their memory when sys_getcap() overwrites more memory than they are coded to expect, and the raising of added capabilities when using sys_capset(). As such, this patch does a number of things to clean up the situation for all. It 1. forces the _LINUX_CAPABILITY_VERSION define to always retain its legacy value. 2. adopts a new #define strategy for the kernel's internal implementation of the preferred magic. 3. deprecates v2 capability magic in favor of a new (v3) magic number. The functionality of v3 is entirely equivalent to v2, the only difference being that the v2 magic causes the kernel to log a "deprecated" warning so the admin can find applications that may be using v2 inappropriately. [User space code continues to be encouraged to use the libcap API which protects the application from details like this. libcap-2.10 is the first to support v3 capabilities.] Fixes issue reported in https://bugzilla.redhat.com/show_bug.cgi?id=447518. Thanks to Bojan Smojver for the report. [akpm@linux-foundation.org: s/depreciate/deprecate/g] [akpm@linux-foundation.org: be robust about put_user size] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Andrew G. Morgan Cc: Serge E. Hallyn Cc: Bojan Smojver Cc: stable@kernel.org Signed-off-by: Andrew Morton Signed-off-by: Chris Wright --- fs/proc/array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index 9e3b8c33c24b..797d775e0354 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -288,7 +288,7 @@ static void render_cap_t(struct seq_file *m, const char *header, seq_printf(m, "%s", header); CAP_FOR_EACH_U32(__capi) { seq_printf(m, "%08x", - a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]); + a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]); } seq_printf(m, "\n"); } -- cgit v1.2.3 From 1d92cfd54a51ff1b9593019fdde56793b66ba6a9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Jun 2008 10:59:02 +0100 Subject: cifs endianness fixes __le16 fields used as host-endian. Signed-off-by: Al Viro Acked-by: Steve French Signed-off-by: Linus Torvalds --- fs/cifs/cifssmb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 9b8b4cfdf993..fb655b4593c6 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3927,9 +3927,9 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, } ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals); - if (ref->VersionNumber != 3) { + if (ref->VersionNumber != cpu_to_le16(3)) { cERROR(1, ("Referrals of V%d version are not supported," - "should be V3", ref->VersionNumber)); + "should be V3", le16_to_cpu(ref->VersionNumber))); rc = -EINVAL; goto parse_DFS_referrals_exit; } @@ -3977,7 +3977,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, if (rc) goto parse_DFS_referrals_exit; - ref += ref->Size; + ref += le16_to_cpu(ref->Size); } parse_DFS_referrals_exit: -- cgit v1.2.3 From ddb2c43594f22843e9f3153da151deaba1a834c5 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 4 Jun 2008 09:16:33 -0700 Subject: asn1: additional sanity checking during BER decoding - Don't trust a length which is greater than the working buffer. An invalid length could cause overflow when calculating buffer size for decoding oid. - An oid length of zero is invalid and allows for an off-by-one error when decoding oid because the first subid actually encodes first 2 subids. - A primitive encoding may not have an indefinite length. Thanks to Wei Wang from McAfee for report. Cc: Steven French Cc: stable@kernel.org Acked-by: Patrick McHardy Signed-off-by: Chris Wright Signed-off-by: Linus Torvalds --- fs/cifs/asn1.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'fs') diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index cb52cbbe45ff..f58e41d3ba48 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -186,6 +186,11 @@ asn1_length_decode(struct asn1_ctx *ctx, unsigned int *def, unsigned int *len) } } } + + /* don't trust len bigger than ctx buffer */ + if (*len > ctx->end - ctx->pointer) + return 0; + return 1; } @@ -203,6 +208,10 @@ asn1_header_decode(struct asn1_ctx *ctx, if (!asn1_length_decode(ctx, &def, &len)) return 0; + /* primitive shall be definite, indefinite shall be constructed */ + if (*con == ASN1_PRI && !def) + return 0; + if (def) *eoc = ctx->pointer + len; else @@ -389,6 +398,11 @@ asn1_oid_decode(struct asn1_ctx *ctx, unsigned long *optr; size = eoc - ctx->pointer + 1; + + /* first subid actually encodes first two subids */ + if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) + return 0; + *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); if (*oid == NULL) return 0; -- cgit v1.2.3 From b8c141e8fd80fa64d80c6a74492053f25a28e0ea Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 5 Jun 2008 22:45:55 -0700 Subject: frv: don't offer BINFMT_FLAT Fix the following compile error: CC fs/binfmt_flat.o In file included from /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:36: /home/bunk/linux/kernel-2.6/git/linux-2.6/include/linux/flat.h:14:22: error: asm/flat.h: No such file or directory /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c: In function 'create_flat_tables': /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:124: error: implicit declaration of function 'flat_stack_align' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:125: error: implicit declaration of function 'flat_argvp_envp_on_stack' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c: In function 'calc_reloc': /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:347: error: implicit declaration of function 'flat_reloc_valid' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c: In function 'load_flat_file': /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:479: error: implicit declaration of function 'flat_old_ram_flag' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:755: error: implicit declaration of function 'flat_set_persistent' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:757: error: implicit declaration of function 'flat_get_relocate_addr' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:765: error: implicit declaration of function 'flat_get_addr_from_rp' /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/binfmt_flat.c:781: error: implicit declaration of function 'flat_put_addr_at_rp' Reported-by: Adrian Bunk Signed-off-by: Adrian Bunk Tested-by: David Howells Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig.binfmt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 55e8ee1900a5..3263084eef9e 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -42,7 +42,7 @@ config BINFMT_ELF_FDPIC config BINFMT_FLAT bool "Kernel support for flat binaries" - depends on !MMU + depends on !MMU && (!FRV || BROKEN) help Support uClinux FLAT format binaries. -- cgit v1.2.3 From d3e49afbb66109613c3474f2273f5830ac2dcb09 Mon Sep 17 00:00:00 2001 From: Michael Halcrow Date: Thu, 5 Jun 2008 22:46:02 -0700 Subject: eCryptfs: remove unnecessary page decrypt call The page decrypt calls in ecryptfs_write() are both pointless and buggy. Pointless because ecryptfs_get_locked_page() has already brought the page up to date, and buggy because prior mmap writes will just be blown away by the decrypt call. This patch also removes the declaration of a now-nonexistent function ecryptfs_write_zeros(). Thanks to Eric Sandeen and David Kleikamp for helping to track this down. Eric said: fsx w/ mmap dies quickly ( < 100 ops) without this, and survives nicely (to millions of ops+) with it in place. Signed-off-by: Michael Halcrow Cc: Eric Sandeen Cc: Dave Kleikamp Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/ecryptfs_kernel.h | 2 -- fs/ecryptfs/read_write.c | 22 ---------------------- 2 files changed, 24 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 951ee33a022d..c15c25745e05 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -660,8 +660,6 @@ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, struct ecryptfs_auth_tok **auth_tok, char *sig); -int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, - int num_zeros); int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, loff_t offset, size_t size); int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index ebf55150be56..75c2ea9fee35 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -157,20 +157,6 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, ecryptfs_page_idx, rc); goto out; } - if (start_offset_in_page) { - /* Read in the page from the lower - * into the eCryptfs inode page cache, - * decrypting */ - rc = ecryptfs_decrypt_page(ecryptfs_page); - if (rc) { - printk(KERN_ERR "%s: Error decrypting " - "page; rc = [%d]\n", - __func__, rc); - ClearPageUptodate(ecryptfs_page); - page_cache_release(ecryptfs_page); - goto out; - } - } ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); /* @@ -349,14 +335,6 @@ int ecryptfs_read(char *data, loff_t offset, size_t size, ecryptfs_page_idx, rc); goto out; } - rc = ecryptfs_decrypt_page(ecryptfs_page); - if (rc) { - printk(KERN_ERR "%s: Error decrypting " - "page; rc = [%d]\n", __func__, rc); - ClearPageUptodate(ecryptfs_page); - page_cache_release(ecryptfs_page); - goto out; - } ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); memcpy((data + data_offset), ((char *)ecryptfs_page_virt + start_offset_in_page), -- cgit v1.2.3 From 44d1b980c72db0faf35adb082fb2208351803028 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 5 Jun 2008 22:46:18 -0700 Subject: Fix various old email addresses for dwmw2 Although if people have questions about ARCnet, perhaps it's _better_ for them to be mailing dwmw2@cam.ac.uk about it... Signed-off-by: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/afs/callback.c | 2 +- fs/afs/inode.c | 2 +- fs/afs/super.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/afs/callback.c b/fs/afs/callback.c index a78d5b236bb1..587ef5123cd8 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -8,7 +8,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * Authors: David Woodhouse + * Authors: David Woodhouse * David Howells * */ diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 08db82e1343a..bb47217f6a18 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -8,7 +8,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * Authors: David Woodhouse + * Authors: David Woodhouse * David Howells * */ diff --git a/fs/afs/super.c b/fs/afs/super.c index 4b572b801d8d..7e3faeef6818 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -10,7 +10,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Authors: David Howells - * David Woodhouse + * David Woodhouse * */ -- cgit v1.2.3 From 93b071139a956e51c98cdefd50a47981a4eb852e Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 5 Jun 2008 22:46:21 -0700 Subject: introduce memory_read_from_buffer() This patch introduces memory_read_from_buffer(). The only difference between memory_read_from_buffer() and simple_read_from_buffer() is which address space the function copies to. simple_read_from_buffer copies to user space memory. memory_read_from_buffer copies to normal memory. Signed-off-by: Akinobu Mita Cc: Al Viro Cc: Doug Warzecha Cc: Zhang Rui Cc: Matt Domsch Cc: Abhay Salunke Cc: Greg Kroah-Hartman Cc: Markus Rechberger Cc: Kay Sievers Cc: Bob Moore Cc: Thomas Renninger Cc: Len Brown Cc: Benjamin Herrenschmidt Cc: "Antonino A. Daplas" Cc: Krzysztof Helt Cc: Geert Uytterhoeven Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Peter Oberparleiter Cc: Michael Holzheu Cc: Brian King Cc: James E.J. Bottomley Cc: Andrew Vasquez Cc: Seokmann Ju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/libfs.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'fs') diff --git a/fs/libfs.c b/fs/libfs.c index b004dfadd891..892d41cb3382 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -528,6 +528,23 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, return count; } +ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, + const void *from, size_t available) +{ + loff_t pos = *ppos; + + if (pos < 0) + return -EINVAL; + if (pos >= available) + return 0; + if (count > available - pos) + count = available - pos; + memcpy(to, from + pos, count); + *ppos = pos + count; + + return count; +} + /* * Transaction based IO. * The file expects a single write which triggers the transaction, and then @@ -800,6 +817,7 @@ EXPORT_SYMBOL(simple_statfs); EXPORT_SYMBOL(simple_sync_file); EXPORT_SYMBOL(simple_unlink); EXPORT_SYMBOL(simple_read_from_buffer); +EXPORT_SYMBOL(memory_read_from_buffer); EXPORT_SYMBOL(simple_transaction_get); EXPORT_SYMBOL(simple_transaction_read); EXPORT_SYMBOL(simple_transaction_release); -- cgit v1.2.3 From 7db9cfd380205f6b50afdc3bc3619f876a5eaf0d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 5 Jun 2008 22:46:27 -0700 Subject: devscgroup: check for device permissions at mount time Currently even if a task sits in an all-denied cgroup it can still mount any block device in any mode it wants. Put a proper check in do_open for block device to prevent this. Signed-off-by: Pavel Emelyanov Acked-by: Serge Hallyn Tested-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/block_dev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 7d822fae7765..470c10ceb0fb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -928,9 +929,14 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) { struct module *owner = NULL; struct gendisk *disk; - int ret = -ENXIO; + int ret; int part; + ret = devcgroup_inode_permission(bdev->bd_inode, file->f_mode); + if (ret != 0) + return ret; + + ret = -ENXIO; file->f_mapping = bdev->bd_inode->i_mapping; lock_kernel(); disk = get_gendisk(bdev->bd_dev, &part); -- cgit v1.2.3 From aae8679b0ebcaa92f99c1c3cb0cd651594a43915 Mon Sep 17 00:00:00 2001 From: Thomas Tuttle Date: Thu, 5 Jun 2008 22:46:31 -0700 Subject: pagemap: fix bug in add_to_pagemap, require aligned-length reads of /proc/pid/pagemap Fix a bug in add_to_pagemap. Previously, since pm->out was a char *, put_user was only copying 1 byte of every PFN, resulting in the top 7 bytes of each PFN not being copied. By requiring that reads be a multiple of 8 bytes, I can make pm->out and pm->end u64*s instead of char*s, which makes put_user work properly, and also simplifies the logic in add_to_pagemap a bit. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Thomas Tuttle Cc: Matt Mackall Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 88717c0f941b..17403629e330 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -496,7 +496,7 @@ const struct file_operations proc_clear_refs_operations = { }; struct pagemapread { - char __user *out, *end; + u64 __user *out, *end; }; #define PM_ENTRY_BYTES sizeof(u64) @@ -519,21 +519,11 @@ struct pagemapread { static int add_to_pagemap(unsigned long addr, u64 pfn, struct pagemapread *pm) { - /* - * Make sure there's room in the buffer for an - * entire entry. Otherwise, only copy part of - * the pfn. - */ - if (pm->out + PM_ENTRY_BYTES >= pm->end) { - if (copy_to_user(pm->out, &pfn, pm->end - pm->out)) - return -EFAULT; - pm->out = pm->end; - return PM_END_OF_BUFFER; - } - if (put_user(pfn, pm->out)) return -EFAULT; - pm->out += PM_ENTRY_BYTES; + pm->out++; + if (pm->out >= pm->end) + return PM_END_OF_BUFFER; return 0; } @@ -634,7 +624,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, ret = -EINVAL; /* file position must be aligned */ - if (*ppos % PM_ENTRY_BYTES) + if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) goto out_task; ret = 0; @@ -664,8 +654,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, goto out_pages; } - pm.out = buf; - pm.end = buf + count; + pm.out = (u64 *)buf; + pm.end = (u64 *)(buf + count); if (!ptrace_may_attach(task)) { ret = -EIO; @@ -690,9 +680,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (ret == PM_END_OF_BUFFER) ret = 0; /* don't need mmap_sem for these, but this looks cleaner */ - *ppos += pm.out - buf; + *ppos += (char *)pm.out - buf; if (!ret) - ret = pm.out - buf; + ret = (char *)pm.out - buf; } out_pages: -- cgit v1.2.3 From d100d148aa48df3b6ad526a48624f906695efe60 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Jun 2008 22:46:46 -0700 Subject: nommu: fix ksize() abuse The nommu binfmt code uses ksize() for pointers returned from do_mmap() which is wrong. This converts the call-sites to use the nommu specific kobjsize() function which works as expected. Cc: Christoph Lameter Cc: Matt Mackall Acked-by: Paul Mundt Acked-by: David Howells Signed-off-by: Pekka Enberg Acked-by: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf_fdpic.c | 2 +- fs/binfmt_flat.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index ddd35d873391..d051a32e6270 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -390,7 +390,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, } /* expand the stack mapping to use up the entire allocation granule */ - fullsize = ksize((char *) current->mm->start_brk); + fullsize = kobjsize((char *) current->mm->start_brk); if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size, fullsize, 0, 0))) stack_size = fullsize; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 3b40d45a3a16..2cb1acda3a82 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -548,7 +548,7 @@ static int load_flat_file(struct linux_binprm * bprm, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); /* Remap to use all availabe slack region space */ if (realdatastart && (realdatastart < (unsigned long)-4096)) { - reallen = ksize((void *)realdatastart); + reallen = kobjsize((void *)realdatastart); if (reallen > len) { realdatastart = do_mremap(realdatastart, len, reallen, MREMAP_FIXED, realdatastart); @@ -600,7 +600,7 @@ static int load_flat_file(struct linux_binprm * bprm, PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); /* Remap to use all availabe slack region space */ if (textpos && (textpos < (unsigned long) -4096)) { - reallen = ksize((void *)textpos); + reallen = kobjsize((void *)textpos); if (reallen > len) { textpos = do_mremap(textpos, len, reallen, MREMAP_FIXED, textpos); @@ -683,7 +683,7 @@ static int load_flat_file(struct linux_binprm * bprm, */ current->mm->start_brk = datapos + data_len + bss_len; current->mm->brk = (current->mm->start_brk + 3) & ~3; - current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len; + current->mm->context.end_brk = memp + kobjsize((void *) memp) - stack_len; } if (flags & FLAT_FLAG_KTRACE) @@ -790,7 +790,7 @@ static int load_flat_file(struct linux_binprm * bprm, /* zero the BSS, BRK and stack areas */ memset((void*)(datapos + data_len), 0, bss_len + - (memp + ksize((void *) memp) - stack_len - /* end brk */ + (memp + kobjsize((void *) memp) - stack_len - /* end brk */ libinfo->lib_list[id].start_brk) + /* start brk */ stack_len); -- cgit v1.2.3 From 9bb91784de6618c955994b2d5be332fb68c87ef1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 5 Jun 2008 22:46:47 -0700 Subject: ext3: fix online resize bug There is a bug when we are trying to verify that the reserve inode's double indirect blocks point back to the primary gdt blocks. The fix is obvious, we need to mod the gdb count by the addr's per block. You can verify this with the following test case dd if=/dev/zero of=disk1 seek=1024 count=1 bs=100M losetup /dev/loop1 disk1 pvcreate /dev/loop1 vgcreate loopvg1 /dev/loop1 lvcreate -l 100%VG loopvg1 -n looplv1 mkfs.ext3 -J size=64 -b 1024 /dev/loopvg1/looplv1 mount /dev/loopvg1/looplv1 /mnt/loop dd if=/dev/zero of=disk2 seek=1024 count=1 bs=50M losetup /dev/loop2 disk2 pvcreate /dev/loop2 vgextend loopvg1 /dev/loop2 lvextend -l 100%VG /dev/loopvg1/looplv1 resize2fs /dev/loopvg1/looplv1 without this patch the resize2fs fails, with it the resize2fs succeeds. Signed-off-by: Josef Bacik Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 28cfd0b40527..77278e947e94 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -580,7 +580,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, } blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count; - data = (__le32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count; + data = (__le32 *)dind->b_data + (EXT3_SB(sb)->s_gdb_count % + EXT3_ADDR_PER_BLOCK(sb)); end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb); /* Get each reserved primary GDT block and verify it holds backups */ -- cgit v1.2.3 From aed5417593ad125283f35513573282139a8664b5 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 5 Jun 2008 22:46:53 -0700 Subject: proc: calculate the correct /proc/ link count This patch: commit e9720acd728a46cb40daa52c99a979f7c4ff195c Author: Pavel Emelyanov Date: Fri Mar 7 11:08:40 2008 -0800 [NET]: Make /proc/net a symlink on /proc/self/net (v3) introduced a /proc/self/net directory without bumping the corresponding link count for /proc/self. This patch replaces the static link count initializations with a call that counts the number of directory entries in the given pid_entry table whenever it is instantiated, and thus relieves the burden of manually keeping the two in sync. [akpm@linux-foundation.org: cleanup] Acked-by: Eric W. Biederman Cc: Pavel Emelyanov Signed-off-by: Vegard Nossum Cc: "David S. Miller" Cc: Alexey Dobriyan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index c447e0743a3c..3b455371e7ff 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -127,6 +127,25 @@ struct pid_entry { NULL, &proc_single_file_operations, \ { .proc_show = &proc_##OTYPE } ) +/* + * Count the number of hardlinks for the pid_entry table, excluding the . + * and .. links. + */ +static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, + unsigned int n) +{ + unsigned int i; + unsigned int count; + + count = 0; + for (i = 0; i < n; ++i) { + if (S_ISDIR(entries[i].mode)) + ++count; + } + + return count; +} + int maps_protect; EXPORT_SYMBOL(maps_protect); @@ -2585,10 +2604,9 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 5; -#ifdef CONFIG_SECURITY - inode->i_nlink += 1; -#endif + + inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, + ARRAY_SIZE(tgid_base_stuff)); dentry->d_op = &pid_dentry_operations; @@ -2816,10 +2834,9 @@ static struct dentry *proc_task_instantiate(struct inode *dir, inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 4; -#ifdef CONFIG_SECURITY - inode->i_nlink += 1; -#endif + + inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, + ARRAY_SIZE(tid_base_stuff)); dentry->d_op = &pid_dentry_operations; -- cgit v1.2.3 From bbcdac0c20aa20d1daad41d9c138102b70e5aae4 Mon Sep 17 00:00:00 2001 From: Thomas Tuttle Date: Thu, 5 Jun 2008 22:46:58 -0700 Subject: pagemap: return map count, not reference count, in /proc/kpagecount Since pagemap is all about examining pages mapped into processes' memory spaces, it makes sense for kpagecount to return the map counts, not the reference counts. Signed-off-by: Thomas Tuttle Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 32dc14cd8900..5a16090a6d6e 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -726,7 +726,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, if (!ppage) pcount = 0; else - pcount = atomic_read(&ppage->_count); + pcount = page_mapcount(ppage); if (put_user(pcount, out++)) { ret = -EFAULT; -- cgit v1.2.3 From 4710d1ac4c491dd8a28f57946214c0b5fe73cc87 Mon Sep 17 00:00:00 2001 From: Thomas Tuttle Date: Thu, 5 Jun 2008 22:46:58 -0700 Subject: pagemap: return EINVAL, not EIO, for unaligned reads of kpagecount or kpageflags If the user tries to read from a position that is not a multiple of 8, or read a number of bytes that is not a multiple of 8, they have passed an invalid argument to read, for the purpose of reading these files. It's not an IO error because we didn't encounter any trouble finding the data they asked for. Signed-off-by: Thomas Tuttle Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 5a16090a6d6e..7e277f2ad466 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -716,7 +716,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, pfn = src / KPMSIZE; count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) - return -EIO; + return -EINVAL; while (count > 0) { ppage = NULL; @@ -782,7 +782,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, pfn = src / KPMSIZE; count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); if (src & KPMMASK || count & KPMMASK) - return -EIO; + return -EINVAL; while (count > 0) { ppage = NULL; -- cgit v1.2.3 From aab2545fdd6641b76af0ae96456c4ca9d1e50dad Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Jun 2008 11:31:39 -0700 Subject: uml: activate_mm: remove the dead PF_BORROWED_MM check use_mm() was changed to use switch_mm() instead of activate_mm(), since then nobody calls (and nobody should call) activate_mm() with PF_BORROWED_MM bit set. As Jeff Dike pointed out, we can also remove the "old != new" check, it is always true. Signed-off-by: Oleg Nesterov Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index b5253e77eb2f..0fb3117ddd93 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -591,10 +591,6 @@ static void use_mm(struct mm_struct *mm) atomic_inc(&mm->mm_count); tsk->mm = mm; tsk->active_mm = mm; - /* - * Note that on UML this *requires* PF_BORROWED_MM to be set, otherwise - * it won't work. Update it accordingly if you change it here - */ switch_mm(active_mm, mm, tsk); task_unlock(tsk); -- cgit v1.2.3 From dbdbb87636e882042cbe53d5d4eac94206f8db83 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 10 Jun 2008 21:21:56 +0000 Subject: [CIFS] Fix hang in mount when negprot causes server to kill tcp session Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/CHANGES | 5 +++++ fs/cifs/connect.c | 1 + 2 files changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 28e3d5c5fcac..1f3465201fdf 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -2,6 +2,11 @@ Version 1.53 ------------ DFS support added (Microsoft Distributed File System client support needed for referrals which enable a hierarchical name space among servers). +Disable temporary caching of mode bits to servers which do not support +storing of mode (e.g. Windows servers, when client mounts without cifsacl +mount option) and add new "dynperm" mount option to enable temporary caching +of mode (enable old behavior). Fix hang on mount caused when server crashes +tcp session during negotiate protocol. Version 1.52 ------------ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d49e274f8eba..e8fa46c7cff2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -653,6 +653,7 @@ multi_t2_fnd: spin_lock(&GlobalMid_Lock); server->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); + wake_up_all(&server->response_q); /* don't exit until kthread_stop is called */ set_current_state(TASK_UNINTERRUPTIBLE); -- cgit v1.2.3 From 79ee9a8b2d328243488fee8b55bfacc822049a2a Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 10 Jun 2008 21:37:02 +0000 Subject: [CIFS] cifs: fix oops on mount when CONFIG_CIFS_DFS_UPCALL is enabled simple "mount -t cifs //xxx /mnt" oopsed on strlen of options http://kerneloops.org/guilty.php?guilty=cifs_get_sb&version=2.6.25-release&start=16711 \ 68&end=1703935&class=oops Signed-off-by: Marcin Slusarz Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5df93fd6303f..86b4d5f405ae 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -97,9 +97,6 @@ cifs_read_super(struct super_block *sb, void *data, { struct inode *inode; struct cifs_sb_info *cifs_sb; -#ifdef CONFIG_CIFS_DFS_UPCALL - int len; -#endif int rc = 0; /* BB should we make this contingent on mount parm? */ @@ -117,15 +114,17 @@ cifs_read_super(struct super_block *sb, void *data, * complex operation (mount), and in case of fail * just exit instead of doing mount and attempting * undo it if this copy fails?*/ - len = strlen(data); - cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); - if (cifs_sb->mountdata == NULL) { - kfree(sb->s_fs_info); - sb->s_fs_info = NULL; - return -ENOMEM; + if (data) { + int len = strlen(data); + cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); + if (cifs_sb->mountdata == NULL) { + kfree(sb->s_fs_info); + sb->s_fs_info = NULL; + return -ENOMEM; + } + strncpy(cifs_sb->mountdata, data, len + 1); + cifs_sb->mountdata[len] = '\0'; } - strncpy(cifs_sb->mountdata, data, len + 1); - cifs_sb->mountdata[len] = '\0'; #endif rc = cifs_mount(sb, cifs_sb, data, devname); -- cgit v1.2.3 From 2d518f84e5ecd1d71df0e6ac5176d212f68c27ce Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 12 Jun 2008 15:21:28 -0700 Subject: fat: relax the permission check of fat_setattr() New chmod() allows only acceptable permission, and if not acceptable, it returns -EPERM. Old one allows even if it can't store permission to on disk inode. But it seems too strict for users. E.g. https://bugzilla.redhat.com/show_bug.cgi?id=449080: With new one, rsync couldn't create the temporary file. So, this patch allows like old one, but now it doesn't change the permission if it can't store, and it returns 0. Also, this patch fixes missing check. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/file.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/fat/file.c b/fs/fat/file.c index 27cc1164ec36..771326b8047e 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -257,26 +257,34 @@ int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) } EXPORT_SYMBOL_GPL(fat_getattr); -static int fat_check_mode(const struct msdos_sb_info *sbi, struct inode *inode, - mode_t mode) +static int fat_sanitize_mode(const struct msdos_sb_info *sbi, + struct inode *inode, umode_t *mode_ptr) { - mode_t mask, req = mode & ~S_IFMT; + mode_t mask, perm; - if (S_ISREG(mode)) + /* + * Note, the basic check is already done by a caller of + * (attr->ia_mode & ~MSDOS_VALID_MODE) + */ + + if (S_ISREG(inode->i_mode)) mask = sbi->options.fs_fmask; else mask = sbi->options.fs_dmask; + perm = *mode_ptr & ~(S_IFMT | mask); + /* * Of the r and x bits, all (subject to umask) must be present. Of the * w bits, either all (subject to umask) or none must be present. */ - req &= ~mask; - if ((req & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) + if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) return -EPERM; - if ((req & S_IWUGO) && ((req & S_IWUGO) != (S_IWUGO & ~mask))) + if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) return -EPERM; + *mode_ptr &= S_IFMT | perm; + return 0; } @@ -299,7 +307,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); struct inode *inode = dentry->d_inode; - int mask, error = 0; + int error = 0; unsigned int ia_valid; lock_kernel(); @@ -332,12 +340,13 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) error = 0; goto out; } + if (((attr->ia_valid & ATTR_UID) && (attr->ia_uid != sbi->options.fs_uid)) || ((attr->ia_valid & ATTR_GID) && (attr->ia_gid != sbi->options.fs_gid)) || ((attr->ia_valid & ATTR_MODE) && - fat_check_mode(sbi, inode, attr->ia_mode) < 0)) + (attr->ia_mode & ~MSDOS_VALID_MODE))) error = -EPERM; if (error) { @@ -346,15 +355,16 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) goto out; } - error = inode_setattr(inode, attr); - if (error) - goto out; + /* + * We don't return -EPERM here. Yes, strange, but this is too + * old behavior. + */ + if (attr->ia_valid & ATTR_MODE) { + if (fat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0) + attr->ia_valid &= ~ATTR_MODE; + } - if (S_ISDIR(inode->i_mode)) - mask = sbi->options.fs_dmask; - else - mask = sbi->options.fs_fmask; - inode->i_mode &= S_IFMT | (S_IRWXUGO & ~mask); + error = inode_setattr(inode, attr); out: unlock_kernel(); return error; -- cgit v1.2.3 From 2165009bdf63f79716a36ad545df14c3cdf958b7 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 12 Jun 2008 15:21:47 -0700 Subject: pagemap: pass mm into pagewalkers We need this at least for huge page detection for now, because powerpc needs the vm_area_struct to be able to determine whether a virtual address is referring to a huge page (its pmd_huge() doesn't work). It might also come in handy for some of the other users. Signed-off-by: Dave Hansen Acked-by: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 17403629e330..f0df3109343d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -315,9 +315,9 @@ struct mem_size_stats { }; static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - void *private) + struct mm_walk *walk) { - struct mem_size_stats *mss = private; + struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; pte_t *pte, ptent; spinlock_t *ptl; @@ -365,19 +365,21 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, return 0; } -static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; - static int show_smap(struct seq_file *m, void *v) { struct vm_area_struct *vma = v; struct mem_size_stats mss; int ret; + struct mm_walk smaps_walk = { + .pmd_entry = smaps_pte_range, + .mm = vma->vm_mm, + .private = &mss, + }; memset(&mss, 0, sizeof mss); mss.vma = vma; if (vma->vm_mm && !is_vm_hugetlb_page(vma)) - walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, - &smaps_walk, &mss); + walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); ret = show_map(m, v); if (ret) @@ -426,9 +428,9 @@ const struct file_operations proc_smaps_operations = { }; static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, void *private) + unsigned long end, struct mm_walk *walk) { - struct vm_area_struct *vma = private; + struct vm_area_struct *vma = walk->private; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; @@ -452,8 +454,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, return 0; } -static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; - static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -476,11 +476,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, return -ESRCH; mm = get_task_mm(task); if (mm) { + static struct mm_walk clear_refs_walk; + memset(&clear_refs_walk, 0, sizeof(clear_refs_walk)); + clear_refs_walk.pmd_entry = clear_refs_pte_range; + clear_refs_walk.mm = mm; down_read(&mm->mmap_sem); - for (vma = mm->mmap; vma; vma = vma->vm_next) + for (vma = mm->mmap; vma; vma = vma->vm_next) { + clear_refs_walk.private = vma; if (!is_vm_hugetlb_page(vma)) - walk_page_range(mm, vma->vm_start, vma->vm_end, - &clear_refs_walk, vma); + walk_page_range(vma->vm_start, vma->vm_end, + &clear_refs_walk); + } flush_tlb_mm(mm); up_read(&mm->mmap_sem); mmput(mm); @@ -528,9 +534,9 @@ static int add_to_pagemap(unsigned long addr, u64 pfn, } static int pagemap_pte_hole(unsigned long start, unsigned long end, - void *private) + struct mm_walk *walk) { - struct pagemapread *pm = private; + struct pagemapread *pm = walk->private; unsigned long addr; int err = 0; for (addr = start; addr < end; addr += PAGE_SIZE) { @@ -548,9 +554,9 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) } static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - void *private) + struct mm_walk *walk) { - struct pagemapread *pm = private; + struct pagemapread *pm = walk->private; pte_t *pte; int err = 0; @@ -675,8 +681,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, * user buffer is tracked in "pm", and the walk * will stop when we hit the end of the buffer. */ - ret = walk_page_range(mm, start_vaddr, end_vaddr, - &pagemap_walk, &pm); + ret = walk_page_range(start_vaddr, end_vaddr, + &pagemap_walk); if (ret == PM_END_OF_BUFFER) ret = 0; /* don't need mmap_sem for these, but this looks cleaner */ -- cgit v1.2.3 From bcf8039ed45f56013c4afea5520bca7d909e5e61 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 12 Jun 2008 15:21:48 -0700 Subject: pagemap: fix large pages in pagemap We were walking right into huge page areas in the pagemap walker, and calling the pmds pmd_bad() and clearing them. That leaked huge pages. Bad. This patch at least works around that for now. It ignores huge pages in the pagemap walker for the time being, and won't leak those pages. Signed-off-by: Dave Hansen Acked-by: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f0df3109343d..ab8ccc9d14ff 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -553,24 +553,45 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); } +static unsigned long pte_to_pagemap_entry(pte_t pte) +{ + unsigned long pme = 0; + if (is_swap_pte(pte)) + pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) + | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; + else if (pte_present(pte)) + pme = PM_PFRAME(pte_pfn(pte)) + | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; + return pme; +} + static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { + struct vm_area_struct *vma; struct pagemapread *pm = walk->private; pte_t *pte; int err = 0; + /* find the first VMA at or above 'addr' */ + vma = find_vma(walk->mm, addr); for (; addr != end; addr += PAGE_SIZE) { u64 pfn = PM_NOT_PRESENT; - pte = pte_offset_map(pmd, addr); - if (is_swap_pte(*pte)) - pfn = PM_PFRAME(swap_pte_to_pagemap_entry(*pte)) - | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; - else if (pte_present(*pte)) - pfn = PM_PFRAME(pte_pfn(*pte)) - | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; - /* unmap so we're not in atomic when we copy to userspace */ - pte_unmap(pte); + + /* check to see if we've left 'vma' behind + * and need a new, higher one */ + if (vma && (addr >= vma->vm_end)) + vma = find_vma(walk->mm, addr); + + /* check that 'vma' actually covers this address, + * and that it isn't a huge page vma */ + if (vma && (vma->vm_start <= addr) && + !is_vm_hugetlb_page(vma)) { + pte = pte_offset_map(pmd, addr); + pfn = pte_to_pagemap_entry(*pte); + /* unmap before userspace copy */ + pte_unmap(pte); + } err = add_to_pagemap(addr, pfn, pm); if (err) return err; -- cgit v1.2.3 From e4f3ec063421bdbcb93330e72aa3eeedb6a0d85a Mon Sep 17 00:00:00 2001 From: Paul Collins Date: Sat, 14 Jun 2008 14:14:59 +1200 Subject: udf: restore UDFFS_DEBUG to being undefined by default Commit 706047a79725b585cf272fdefc234b31b6545c72, "udf: Fix compilation warnings when UDF debug is on" inadvertently (I assume) enabled debugging messages by default for UDF. This patch disables them again. Signed-off-by: Paul Collins Signed-off-by: Jan Kara --- fs/udf/udfdecl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 8fa9c2d70911..8ec865de5f13 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -16,7 +16,7 @@ #define UDF_PREALLOCATE #define UDF_DEFAULT_PREALLOC_BLOCKS 8 -#define UDFFS_DEBUG +#undef UDFFS_DEBUG #ifdef UDFFS_DEBUG #define udf_debug(f, a...) \ -- cgit v1.2.3 From 702773b16e83fcddc41e0019b8214d3c3cecedbe Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 16 Jun 2008 12:11:54 +0100 Subject: Include in fs/exec.c only for Alpha. We only need it for the /sbin/loader hack for OSF/1 executables, and we don't want to include it otherwise. While we're at it, remove the redundant '&& CONFIG_ARCH_SUPPORTS_AOUT' in the ifdef around that code. It's already dependent on __alpha__, and CONFIG_ARCH_SUPPORTS_AOUT is hard-coded to 'y' there. Signed-off-by: David Woodhouse Acked-by: Peter Korsgaard Signed-off-by: Linus Torvalds --- fs/exec.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 9448f1b50b4a..da94a6f05df3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -61,6 +60,11 @@ #include #endif +#ifdef __alpha__ +/* for /sbin/loader handling in search_binary_handler() */ +#include +#endif + int core_uses_pid; char core_pattern[CORENAME_MAX_SIZE] = "core"; int suid_dumpable = 0; @@ -1155,7 +1159,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) { int try,retval; struct linux_binfmt *fmt; -#if defined(__alpha__) && defined(CONFIG_ARCH_SUPPORTS_AOUT) +#ifdef __alpha__ /* handle /sbin/loader.. */ { struct exec * eh = (struct exec *) bprm->buf; -- cgit v1.2.3 From a9e0f5293d4999f93b469af4e70382db800a8204 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 16 Jun 2008 12:18:13 +0100 Subject: Remove last traces of a.out support from ELF loader. In commit d20894a23708c2af75966534f8e4dedb46d48db2 ("Remove a.out interpreter support in ELF loader"), Andi removed support for a.out interpreters from the ELF loader, which was only ever needed for the transition from a.out to ELF. This removes the last traces of that support, in particular the inclusion of . Signed-off-by: David Woodhouse Acked-by: Peter Korsgaard Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0fa95b198e6e..d48ff5f370f4 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -548,7 +547,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) struct { struct elfhdr elf_ex; struct elfhdr interp_elf_ex; - struct exec interp_ex; } *loc; loc = kmalloc(sizeof(*loc), GFP_KERNEL); @@ -680,7 +678,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) } /* Get the exec headers */ - loc->interp_ex = *((struct exec *)bprm->buf); loc->interp_elf_ex = *((struct elfhdr *)bprm->buf); break; } -- cgit v1.2.3 From 3878f110f71a0971ff7acc15dd6db711b6ef37c6 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 30 May 2008 15:30:49 -0700 Subject: ocfs2: Move the hb_ctl_path sysctl into the stack glue. ocfs2 needs to call out to the hb_ctl program at unmount for all cluster stacks. The first step is to move the hb_ctl_path sysctl out of the o2cb code and into the generic stack glue. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/nodemanager.c | 74 +----------------------------------- fs/ocfs2/cluster/nodemanager.h | 4 -- fs/ocfs2/stack_o2cb.c | 2 +- fs/ocfs2/stackglue.c | 85 ++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/stackglue.h | 2 + 5 files changed, 89 insertions(+), 78 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index cf9401e8cd0b..cfdb08b484ed 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -21,7 +21,6 @@ #include #include -#include #include #include "tcp.h" @@ -36,65 +35,6 @@ * cluster references throughout where nodes are looked up */ struct o2nm_cluster *o2nm_single_cluster = NULL; -#define OCFS2_MAX_HB_CTL_PATH 256 -static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; - -static ctl_table ocfs2_nm_table[] = { - { - .ctl_name = 1, - .procname = "hb_ctl_path", - .data = ocfs2_hb_ctl_path, - .maxlen = OCFS2_MAX_HB_CTL_PATH, - .mode = 0644, - .proc_handler = &proc_dostring, - .strategy = &sysctl_string, - }, - { .ctl_name = 0 } -}; - -static ctl_table ocfs2_mod_table[] = { - { - .ctl_name = FS_OCFS2_NM, - .procname = "nm", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_nm_table - }, - { .ctl_name = 0} -}; - -static ctl_table ocfs2_kern_table[] = { - { - .ctl_name = FS_OCFS2, - .procname = "ocfs2", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_mod_table - }, - { .ctl_name = 0} -}; - -static ctl_table ocfs2_root_table[] = { - { - .ctl_name = CTL_FS, - .procname = "fs", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_kern_table - }, - { .ctl_name = 0 } -}; - -static struct ctl_table_header *ocfs2_table_header = NULL; - -const char *o2nm_get_hb_ctl_path(void) -{ - return ocfs2_hb_ctl_path; -} -EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path); struct o2nm_node *o2nm_get_node_by_num(u8 node_num) { @@ -941,9 +881,6 @@ void o2nm_undepend_this_node(void) static void __exit exit_o2nm(void) { - if (ocfs2_table_header) - unregister_sysctl_table(ocfs2_table_header); - /* XXX sync with hb callbacks and shut down hb? */ o2net_unregister_hb_callbacks(); configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); @@ -964,16 +901,9 @@ static int __init init_o2nm(void) if (ret) goto out; - ocfs2_table_header = register_sysctl_table(ocfs2_root_table); - if (!ocfs2_table_header) { - printk(KERN_ERR "nodemanager: unable to register sysctl\n"); - ret = -ENOMEM; /* or something. */ - goto out_o2net; - } - ret = o2net_register_hb_callbacks(); if (ret) - goto out_sysctl; + goto out_o2net; config_group_init(&o2nm_cluster_group.cs_subsys.su_group); mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex); @@ -990,8 +920,6 @@ static int __init init_o2nm(void) configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); out_callbacks: o2net_unregister_hb_callbacks(); -out_sysctl: - unregister_sysctl_table(ocfs2_table_header); out_o2net: o2net_exit(); out: diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h index 7c860361b8dd..c992ea0da4ad 100644 --- a/fs/ocfs2/cluster/nodemanager.h +++ b/fs/ocfs2/cluster/nodemanager.h @@ -33,10 +33,6 @@ #include #include -#define FS_OCFS2_NM 1 - -const char *o2nm_get_hb_ctl_path(void); - struct o2nm_node { spinlock_t nd_lock; struct config_item nd_item; diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index bbd1667aa7d3..fb26a7c69c47 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -338,7 +338,7 @@ static void o2hb_stop(const char *group) int ret; char *argv[5], *envp[3]; - argv[0] = (char *)o2nm_get_hb_ctl_path(); + argv[0] = (char *)ocfs2_get_hb_ctl_path(); argv[1] = "-K"; argv[2] = "-u"; argv[3] = (char *)group; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 119f60cea9cc..fb9b8e0db260 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "ocfs2_fs.h" @@ -548,10 +549,92 @@ error: return ret; } +/* + * Sysctl bits + * + * The sysctl lives at /proc/sys/fs/ocfs2/nm/hb_ctl_path. The 'nm' doesn't + * make as much sense in a multiple cluster stack world, but it's safer + * and easier to preserve the name. + */ + +#define FS_OCFS2_NM 1 + +#define OCFS2_MAX_HB_CTL_PATH 256 +static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; + +static ctl_table ocfs2_nm_table[] = { + { + .ctl_name = 1, + .procname = "hb_ctl_path", + .data = ocfs2_hb_ctl_path, + .maxlen = OCFS2_MAX_HB_CTL_PATH, + .mode = 0644, + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, + { .ctl_name = 0 } +}; + +static ctl_table ocfs2_mod_table[] = { + { + .ctl_name = FS_OCFS2_NM, + .procname = "nm", + .data = NULL, + .maxlen = 0, + .mode = 0555, + .child = ocfs2_nm_table + }, + { .ctl_name = 0} +}; + +static ctl_table ocfs2_kern_table[] = { + { + .ctl_name = FS_OCFS2, + .procname = "ocfs2", + .data = NULL, + .maxlen = 0, + .mode = 0555, + .child = ocfs2_mod_table + }, + { .ctl_name = 0} +}; + +static ctl_table ocfs2_root_table[] = { + { + .ctl_name = CTL_FS, + .procname = "fs", + .data = NULL, + .maxlen = 0, + .mode = 0555, + .child = ocfs2_kern_table + }, + { .ctl_name = 0 } +}; + +static struct ctl_table_header *ocfs2_table_header = NULL; + +const char *ocfs2_get_hb_ctl_path(void) +{ + return ocfs2_hb_ctl_path; +} +EXPORT_SYMBOL_GPL(ocfs2_get_hb_ctl_path); + + +/* + * Initialization + */ + static int __init ocfs2_stack_glue_init(void) { strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); + ocfs2_table_header = register_sysctl_table(ocfs2_root_table); + if (!ocfs2_table_header) { + printk(KERN_ERR + "ocfs2 stack glue: unable to register sysctl\n"); + return -ENOMEM; /* or something. */ + } + return ocfs2_sysfs_init(); } @@ -559,6 +642,8 @@ static void __exit ocfs2_stack_glue_exit(void) { lproto = NULL; ocfs2_sysfs_exit(); + if (ocfs2_table_header) + unregister_sysctl_table(ocfs2_table_header); } MODULE_AUTHOR("Oracle"); diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index 005e4f170e0f..c9fb01ab6347 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -258,4 +258,6 @@ void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) /* Used by stack plugins */ int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); +const char *ocfs2_get_hb_ctl_path(void); + #endif /* STACKGLUE_H */ -- cgit v1.2.3 From 9f9a99f4eccc64650e932090cff0ebd07b81e334 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 30 May 2008 15:43:58 -0700 Subject: ocfs2: Move the call of ocfs2_hb_ctl into the stack glue. Take o2hb_stop() out of the o2cb code and make it part of the generic stack glue as ocfs2_leave_group(). This also allows us to remove the ocfs2_get_hb_ctl_path() function - everything to do with hb_ctl is now part of stackglue.c. o2cb no longer needs a ->hangup() function. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stack_o2cb.c | 38 -------------------------------------- fs/ocfs2/stackglue.c | 49 ++++++++++++++++++++++++++++++++++++++++--------- fs/ocfs2/stackglue.h | 1 - 3 files changed, 40 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index fb26a7c69c47..765ade5ee84a 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -333,43 +333,6 @@ static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn, return 0; } -static void o2hb_stop(const char *group) -{ - int ret; - char *argv[5], *envp[3]; - - argv[0] = (char *)ocfs2_get_hb_ctl_path(); - argv[1] = "-K"; - argv[2] = "-u"; - argv[3] = (char *)group; - argv[4] = NULL; - - mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]); - - /* minimal command environment taken from cpu_run_sbin_hotplug */ - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - envp[2] = NULL; - - ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); - if (ret < 0) - mlog_errno(ret); -} - -/* - * Hangup is a hack for tools compatibility. Older ocfs2-tools software - * expects the filesystem to call "ocfs2_hb_ctl" during unmount. This - * happens regardless of whether the DLM got started, so we can't do it - * in ocfs2_cluster_disconnect(). We bring the o2hb_stop() function into - * the glue and provide a "hangup" API for super.c to call. - * - * Other stacks will eventually provide a NULL ->hangup() pointer. - */ -static void o2cb_cluster_hangup(const char *group, int grouplen) -{ - o2hb_stop(group); -} - static int o2cb_cluster_this_node(unsigned int *node) { int node_num; @@ -388,7 +351,6 @@ static int o2cb_cluster_this_node(unsigned int *node) static struct ocfs2_stack_operations o2cb_stack_ops = { .connect = o2cb_cluster_connect, .disconnect = o2cb_cluster_disconnect, - .hangup = o2cb_cluster_hangup, .this_node = o2cb_cluster_this_node, .dlm_lock = o2cb_dlm_lock, .dlm_unlock = o2cb_dlm_unlock, diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index fb9b8e0db260..5f78ff4c76c7 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -34,11 +34,13 @@ #define OCFS2_STACK_PLUGIN_O2CB "o2cb" #define OCFS2_STACK_PLUGIN_USER "user" +#define OCFS2_MAX_HB_CTL_PATH 256 static struct ocfs2_locking_protocol *lproto; static DEFINE_SPINLOCK(ocfs2_stack_lock); static LIST_HEAD(ocfs2_stack_list); static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; +static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; /* * The stack currently in use. If not null, active_stack->sp_count > 0, @@ -363,6 +365,42 @@ int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, } EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); +/* + * Leave the group for this filesystem. This is executed by a userspace + * program (stored in ocfs2_hb_ctl_path). + */ +static void ocfs2_leave_group(const char *group) +{ + int ret; + char *argv[5], *envp[3]; + + argv[0] = ocfs2_hb_ctl_path; + argv[1] = "-K"; + argv[2] = "-u"; + argv[3] = (char *)group; + argv[4] = NULL; + + /* minimal command environment taken from cpu_run_sbin_hotplug */ + envp[0] = "HOME=/"; + envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; + envp[2] = NULL; + + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + if (ret < 0) { + printk(KERN_ERR + "ocfs2: Error %d running user helper " + "\"%s %s %s %s\"\n", + ret, argv[0], argv[1], argv[2], argv[3]); + } +} + +/* + * Hangup is a required post-umount. ocfs2-tools software expects the + * filesystem to call "ocfs2_hb_ctl" during unmount. This happens + * regardless of whether the DLM got started, so we can't do it + * in ocfs2_cluster_disconnect(). The ocfs2_leave_group() function does + * the actual work. + */ void ocfs2_cluster_hangup(const char *group, int grouplen) { BUG_ON(group == NULL); @@ -371,6 +409,8 @@ void ocfs2_cluster_hangup(const char *group, int grouplen) if (active_stack->sp_ops->hangup) active_stack->sp_ops->hangup(group, grouplen); + ocfs2_leave_group(group); + /* cluster_disconnect() was called with hangup_pending==1 */ ocfs2_stack_driver_put(); } @@ -559,9 +599,6 @@ error: #define FS_OCFS2_NM 1 -#define OCFS2_MAX_HB_CTL_PATH 256 -static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; - static ctl_table ocfs2_nm_table[] = { { .ctl_name = 1, @@ -613,12 +650,6 @@ static ctl_table ocfs2_root_table[] = { static struct ctl_table_header *ocfs2_table_header = NULL; -const char *ocfs2_get_hb_ctl_path(void) -{ - return ocfs2_hb_ctl_path; -} -EXPORT_SYMBOL_GPL(ocfs2_get_hb_ctl_path); - /* * Initialization diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index c9fb01ab6347..fe3fd2a12821 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -258,6 +258,5 @@ void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) /* Used by stack plugins */ int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); -const char *ocfs2_get_hb_ctl_path(void); #endif /* STACKGLUE_H */ -- cgit v1.2.3 From 2c39450b39880e162b3eb339672314101f58ee1a Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 30 May 2008 15:58:26 -0700 Subject: ocfs2: Remove ->hangup() from stack glue operations. The ->hangup() call was only used to execute ocfs2_hb_ctl. Now that the generic stack glue code handles this, the underlying stack drivers don't need to know about it. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/stack_o2cb.c | 3 +-- fs/ocfs2/stack_user.c | 3 +-- fs/ocfs2/stackglue.c | 5 +---- fs/ocfs2/stackglue.h | 18 +++--------------- 4 files changed, 6 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 765ade5ee84a..fcd120f1493a 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -317,8 +317,7 @@ out: return rc; } -static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn, - int hangup_pending) +static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn) { struct dlm_ctxt *dlm = conn->cc_lockspace; struct o2dlm_private *priv = conn->cc_private; diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 6b97d11f6bf8..c021280dd462 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -816,8 +816,7 @@ out: return rc; } -static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn, - int hangup_pending) +static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn) { dlm_release_lockspace(conn->cc_lockspace, 2); conn->cc_lockspace = NULL; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 5f78ff4c76c7..10e149ae5e3a 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -352,7 +352,7 @@ int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, BUG_ON(conn == NULL); - ret = active_stack->sp_ops->disconnect(conn, hangup_pending); + ret = active_stack->sp_ops->disconnect(conn); /* XXX Should we free it anyway? */ if (!ret) { @@ -406,9 +406,6 @@ void ocfs2_cluster_hangup(const char *group, int grouplen) BUG_ON(group == NULL); BUG_ON(group[grouplen] != '\0'); - if (active_stack->sp_ops->hangup) - active_stack->sp_ops->hangup(group, grouplen); - ocfs2_leave_group(group); /* cluster_disconnect() was called with hangup_pending==1 */ diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index fe3fd2a12821..db56281dd1be 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -134,22 +134,10 @@ struct ocfs2_stack_operations { * be freed. Thus, a stack must not return from ->disconnect() * until it will no longer reference the conn pointer. * - * If hangup_pending is zero, ocfs2_cluster_disconnect() will also - * be dropping the reference on the module. + * Once this call returns, the stack glue will be dropping this + * connection's reference on the module. */ - int (*disconnect)(struct ocfs2_cluster_connection *conn, - int hangup_pending); - - /* - * ocfs2_cluster_hangup() exists for compatibility with older - * ocfs2 tools. Only the classic stack really needs it. As such - * ->hangup() is not required of all stacks. See the comment by - * ocfs2_cluster_hangup() for more details. - * - * Note that ocfs2_cluster_hangup() can only be called if - * hangup_pending was passed to ocfs2_cluster_disconnect(). - */ - void (*hangup)(const char *group, int grouplen); + int (*disconnect)(struct ocfs2_cluster_connection *conn); /* * ->this_node() returns the cluster's unique identifier for the -- cgit v1.2.3 From f948d56435fc1f7506f08866302ecd6e60b533dd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 17 Jun 2008 18:05:40 +0200 Subject: fuse: fix thinko in max I/O size calucation Use max not min to enforce a lower limit on the max I/O size. This bug was introduced by "fuse: fix max i/o size calculation" (commit e5d9a0df07484d6d191756878c974e4307fb24ce). Thanks to Brian Wang for noticing. Reported-by: Brian Wang Signed-off-by: Miklos Szeredi Acked-by: Szabolcs Szakacsits Signed-off-by: Linus Torvalds --- fs/fuse/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 43e99513334a..3141690558c8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -591,7 +591,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); fc->minor = arg->minor; fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; - fc->max_write = min_t(unsigned, 4096, fc->max_write); + fc->max_write = max_t(unsigned, 4096, fc->max_write); fc->conn_init = 1; } fuse_put_request(fc, req); @@ -667,7 +667,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->flags = d.flags; fc->user_id = d.user_id; fc->group_id = d.group_id; - fc->max_read = min_t(unsigned, 4096, d.max_read); + fc->max_read = max_t(unsigned, 4096, d.max_read); /* Used by get_root_inode() */ sb->s_fs_info = fc; -- cgit v1.2.3 From 2856922c158605514ec5974a03097eaec91f4c0d Mon Sep 17 00:00:00 2001 From: Frederic Bohe Date: Fri, 20 Jun 2008 11:48:48 -0400 Subject: Ext4: Fix online resize block group descriptor corruption This is the patch for the group descriptor table corruption during online resize pointed out by Theodore Tso. The problem was caused by the fact that the ext4 group descriptor can be either 32 or 64 bytes long. Only the 64 bytes structure was taken into account. Signed-off-by: Frederic Bohe Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9ecb92f68543..9ff7b1c04239 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -855,7 +855,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) */ /* Update group descriptor block for new group */ - gdp = (struct ext4_group_desc *)primary->b_data + gdb_off; + gdp = (struct ext4_group_desc *)((char *)primary->b_data + + gdb_off * EXT4_DESC_SIZE(sb)); ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ -- cgit v1.2.3 From 55d8538498f62ec72b5ba67aa386c7726f630475 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 Jun 2008 12:23:15 -0700 Subject: Fix performance regression on lmbench select benchmark Christian Borntraeger reported that reinstating cond_resched() with CONFIG_PREEMPT caused a performance regression on lmbench: For example select file 500: 23 microseconds 32 microseconds and that's really because we totally unnecessarily do the cond_resched() in the innermost loop of select(), which is just silly. This moves it out from the innermost loop (which only ever loops ove the bits in a single "unsigned long" anyway), which makes the performance regression go away. Reported-and-tested-by: Christian Borntraeger Signed-off-by: Linus Torvalds --- fs/select.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/select.c b/fs/select.c index 8dda969614a9..da0e88201c3a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -249,7 +249,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) retval++; } } - cond_resched(); } if (res_in) *rinp = res_in; @@ -257,6 +256,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) *routp = res_out; if (res_ex) *rexp = res_ex; + cond_resched(); } wait = NULL; if (retval || !*timeout || signal_pending(current)) -- cgit v1.2.3 From 33852a1f2bb014e4047a844556c0d76a2f790c37 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jun 2008 14:20:11 -0400 Subject: NFS: Reduce the NFS mount code stack usage. This appears to fix the Oops reported in http://bugzilla.kernel.org/show_bug.cgi?id=10826 Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 68 ++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2a4a024a4e7b..dac663dc5611 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1216,8 +1216,6 @@ static int nfs_validate_mount_data(void *options, { struct nfs_mount_data *data = (struct nfs_mount_data *)options; - memset(args, 0, sizeof(*args)); - if (data == NULL) goto out_no_data; @@ -1585,24 +1583,29 @@ static int nfs_get_sb(struct file_system_type *fs_type, { struct nfs_server *server = NULL; struct super_block *s; - struct nfs_fh mntfh; - struct nfs_parsed_mount_data data; + struct nfs_parsed_mount_data *data; + struct nfs_fh *mntfh; struct dentry *mntroot; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_sb_mountdata sb_mntdata = { .mntflags = flags, }; - int error; + int error = -ENOMEM; - security_init_mnt_opts(&data.lsm_opts); + data = kzalloc(sizeof(*data), GFP_KERNEL); + mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); + if (data == NULL || mntfh == NULL) + goto out_free_fh; + + security_init_mnt_opts(&data->lsm_opts); /* Validate the mount data */ - error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name); + error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); if (error < 0) goto out; /* Get a volume representation */ - server = nfs_create_server(&data, &mntfh); + server = nfs_create_server(data, mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -1630,16 +1633,16 @@ static int nfs_get_sb(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ - nfs_fill_super(s, &data); + nfs_fill_super(s, data); } - mntroot = nfs_get_root(s, &mntfh); + mntroot = nfs_get_root(s, mntfh); if (IS_ERR(mntroot)) { error = PTR_ERR(mntroot); goto error_splat_super; } - error = security_sb_set_mnt_opts(s, &data.lsm_opts); + error = security_sb_set_mnt_opts(s, &data->lsm_opts); if (error) goto error_splat_root; @@ -1649,9 +1652,12 @@ static int nfs_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data.nfs_server.hostname); - kfree(data.mount_server.hostname); - security_free_mnt_opts(&data.lsm_opts); + kfree(data->nfs_server.hostname); + kfree(data->mount_server.hostname); + security_free_mnt_opts(&data->lsm_opts); +out_free_fh: + kfree(mntfh); + kfree(data); return error; out_err_nosb: @@ -1800,8 +1806,6 @@ static int nfs4_validate_mount_data(void *options, struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; char *c; - memset(args, 0, sizeof(*args)); - if (data == NULL) goto out_no_data; @@ -1959,26 +1963,31 @@ out_no_client_address: static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - struct nfs_parsed_mount_data data; + struct nfs_parsed_mount_data *data; struct super_block *s; struct nfs_server *server; - struct nfs_fh mntfh; + struct nfs_fh *mntfh; struct dentry *mntroot; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_sb_mountdata sb_mntdata = { .mntflags = flags, }; - int error; + int error = -ENOMEM; - security_init_mnt_opts(&data.lsm_opts); + data = kzalloc(sizeof(*data), GFP_KERNEL); + mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); + if (data == NULL || mntfh == NULL) + goto out_free_fh; + + security_init_mnt_opts(&data->lsm_opts); /* Validate the mount data */ - error = nfs4_validate_mount_data(raw_data, &data, dev_name); + error = nfs4_validate_mount_data(raw_data, data, dev_name); if (error < 0) goto out; /* Get a volume representation */ - server = nfs4_create_server(&data, &mntfh); + server = nfs4_create_server(data, mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -2009,13 +2018,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type, nfs4_fill_super(s); } - mntroot = nfs4_get_root(s, &mntfh); + mntroot = nfs4_get_root(s, mntfh); if (IS_ERR(mntroot)) { error = PTR_ERR(mntroot); goto error_splat_super; } - error = security_sb_set_mnt_opts(s, &data.lsm_opts); + error = security_sb_set_mnt_opts(s, &data->lsm_opts); if (error) goto error_splat_root; @@ -2025,10 +2034,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data.client_address); - kfree(data.nfs_server.export_path); - kfree(data.nfs_server.hostname); - security_free_mnt_opts(&data.lsm_opts); + kfree(data->client_address); + kfree(data->nfs_server.export_path); + kfree(data->nfs_server.hostname); + security_free_mnt_opts(&data->lsm_opts); +out_free_fh: + kfree(mntfh); + kfree(data); return error; out_free: -- cgit v1.2.3 From b7e2445737ff69cef892b6fd9cd71cae2c9e9515 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jun 2008 15:21:11 -0400 Subject: NFS: Fix filehandle size comparisons in the mount code Fix a sign issue in xdr_decode_fhstatus3() Fix incorrect comparison in nfs_validate_mount_data() Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 5 +++-- fs/nfs/super.c | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 49c7cd0502cc..779d2eb649c5 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -130,10 +130,11 @@ static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) { struct nfs_fh *fh = res->fh; + unsigned size; if ((res->status = ntohl(*p++)) == 0) { - int size = ntohl(*p++); - if (size <= NFS3_FHSIZE) { + size = ntohl(*p++); + if (size <= NFS3_FHSIZE && size != 0) { fh->size = size; memcpy(fh->data, p, size); } else diff --git a/fs/nfs/super.c b/fs/nfs/super.c index dac663dc5611..614efeed5437 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1249,13 +1249,13 @@ static int nfs_validate_mount_data(void *options, case 5: memset(data->context, 0, sizeof(data->context)); case 6: - if (data->flags & NFS_MOUNT_VER3) + if (data->flags & NFS_MOUNT_VER3) { + if (data->root.size > NFS3_FHSIZE || data->root.size == 0) + goto out_invalid_fh; mntfh->size = data->root.size; - else + } else mntfh->size = NFS2_FHSIZE; - if (mntfh->size > sizeof(mntfh->data)) - goto out_invalid_fh; memcpy(mntfh->data, data->root.data, mntfh->size); if (mntfh->size < sizeof(mntfh->data)) -- cgit v1.2.3 From 03fa9e84e5dc10aeacb0e4eb2f708cd9fc36a5b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Jun 2008 16:02:35 -0400 Subject: NFS: nfs_updatepage(): don't mark page as dirty if an error occurred Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6d8ace3e3259..f333848fd3be 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -739,12 +739,13 @@ int nfs_updatepage(struct file *file, struct page *page, } status = nfs_writepage_setup(ctx, page, offset, count); - __set_page_dirty_nobuffers(page); + if (status < 0) + nfs_set_pageerror(page); + else + __set_page_dirty_nobuffers(page); dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", status, (long long)i_size_read(inode)); - if (status < 0) - nfs_set_pageerror(page); return status; } -- cgit v1.2.3 From 17c15da00c0e7289375ad57e8fea0c7892b74aa0 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 18 Jun 2008 11:30:40 -0500 Subject: [GFS2] BUG: unable to handle kernel paging request at ffff81002690e000 This patch fixes bugzilla bug bz448866: gfs2: BUG: unable to handle kernel paging request at ffff81002690e000. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 6387523a3153..3401628d742b 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -195,7 +195,7 @@ ulong_aligned: depending on architecture. I've experimented with several ways of writing this section such as using an else before the goto but this one seems to be the fastest. */ - while ((unsigned char *)plong < end - 1) { + while ((unsigned char *)plong < end - sizeof(unsigned long)) { prefetch(plong + 1); if (((*plong) & LBITMASK) != lskipval) break; -- cgit v1.2.3 From 5af4e7a0bea715f2dd7190859a43eb2258b1f388 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 24 Jun 2008 12:53:38 -0500 Subject: [GFS2] fix gfs2 block allocation (cleaned up) This patch fixes bz 450641. This patch changes the computation for zero_metapath_length(), which it renames to metapath_branch_start(). When you are extending the metadata tree, The indirect blocks that point to the new data block must either diverge from the existing tree either at the inode, or at the first indirect block. They can diverge at the first indirect block because the inode has room for 483 pointers while the indirect blocks have room for 509 pointers, so when the tree is grown, there is some free space in the first indirect block. What metapath_branch_start() now computes is the height where the first indirect block for the new data block is located. It can either be 1 (if the indirect block diverges from the inode) or 2 (if it diverges from the first indirect block). Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index c19184f2e70e..bec76b1c2bb0 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -246,15 +246,11 @@ static void find_metapath(const struct gfs2_sbd *sdp, u64 block, } -static inline unsigned int zero_metapath_length(const struct metapath *mp, - unsigned height) +static inline unsigned int metapath_branch_start(const struct metapath *mp) { - unsigned int i; - for (i = 0; i < height - 1; i++) { - if (mp->mp_list[i] != 0) - return i; - } - return height; + if (mp->mp_list[0] == 0) + return 2; + return 1; } /** @@ -436,7 +432,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh = mp->mp_bh[0]; u64 bn, dblock = 0; - unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0; + unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; unsigned dblks = 0; unsigned ptrs_per_blk; const unsigned end_of_metadata = height - 1; @@ -471,9 +467,8 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, /* Building up tree height */ state = ALLOC_GROW_HEIGHT; iblks = height - ip->i_height; - zmpl = zero_metapath_length(mp, height); - iblks -= zmpl; - iblks += height; + branch_start = metapath_branch_start(mp); + iblks += (height - branch_start); } } @@ -509,13 +504,13 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, sizeof(struct gfs2_meta_header)); *ptr = zero_bn; state = ALLOC_GROW_DEPTH; - for(i = zmpl; i < height; i++) { + for(i = branch_start; i < height; i++) { if (mp->mp_bh[i] == NULL) break; brelse(mp->mp_bh[i]); mp->mp_bh[i] = NULL; } - i = zmpl; + i = branch_start; } if (n == 0) break; -- cgit v1.2.3