summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-17 16:12:34 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-17 16:12:34 -0800
commit038911597e17017cee55fe93d521164a27056866 (patch)
tree8f279a91de8237ce370a14d745940cccfd78ea07
parent66dc830d14a222c9214a8557e9feb1e4a67a3857 (diff)
parenta26f49926da938f47561f386be56a83dd37a496d (diff)
Merge branch 'lazytime' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull lazytime mount option support from Al Viro: "Lazytime stuff from tytso" * 'lazytime' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: ext4: add optimization for the lazytime mount option vfs: add find_inode_nowait() function vfs: add support for a lazytime mount option
-rw-r--r--fs/ext4/inode.c70
-rw-r--r--fs/ext4/super.c10
-rw-r--r--fs/fs-writeback.c62
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/inode.c106
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/proc_namespace.c1
-rw-r--r--fs/sync.c8
-rw-r--r--include/linux/backing-dev.h1
-rw-r--r--include/linux/fs.h10
-rw-r--r--include/trace/events/ext4.h30
-rw-r--r--include/trace/events/writeback.h60
-rw-r--r--include/uapi/linux/fs.h4
-rw-r--r--mm/backing-dev.c10
15 files changed, 343 insertions, 37 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 28555f191b62..85404f15e53a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4174,6 +4174,65 @@ static int ext4_inode_blocks_set(handle_t *handle,
return 0;
}
+struct other_inode {
+ unsigned long orig_ino;
+ struct ext4_inode *raw_inode;
+};
+
+static int other_inode_match(struct inode * inode, unsigned long ino,
+ void *data)
+{
+ struct other_inode *oi = (struct other_inode *) data;
+
+ if ((inode->i_ino != ino) ||
+ (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
+ I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
+ ((inode->i_state & I_DIRTY_TIME) == 0))
+ return 0;
+ spin_lock(&inode->i_lock);
+ if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
+ I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
+ (inode->i_state & I_DIRTY_TIME)) {
+ struct ext4_inode_info *ei = EXT4_I(inode);
+
+ inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+ spin_unlock(&inode->i_lock);
+
+ spin_lock(&ei->i_raw_lock);
+ EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
+ EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
+ EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
+ ext4_inode_csum_set(inode, oi->raw_inode, ei);
+ spin_unlock(&ei->i_raw_lock);
+ trace_ext4_other_inode_update_time(inode, oi->orig_ino);
+ return -1;
+ }
+ spin_unlock(&inode->i_lock);
+ return -1;
+}
+
+/*
+ * Opportunistically update the other time fields for other inodes in
+ * the same inode table block.
+ */
+static void ext4_update_other_inodes_time(struct super_block *sb,
+ unsigned long orig_ino, char *buf)
+{
+ struct other_inode oi;
+ unsigned long ino;
+ int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
+ int inode_size = EXT4_INODE_SIZE(sb);
+
+ oi.orig_ino = orig_ino;
+ ino = orig_ino & ~(inodes_per_block - 1);
+ for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
+ if (ino == orig_ino)
+ continue;
+ oi.raw_inode = (struct ext4_inode *) buf;
+ (void) find_inode_nowait(sb, ino, other_inode_match, &oi);
+ }
+}
+
/*
* Post the struct inode info into an on-disk inode location in the
* buffer-cache. This gobbles the caller's reference to the
@@ -4283,10 +4342,11 @@ static int ext4_do_update_inode(handle_t *handle,
cpu_to_le16(ei->i_extra_isize);
}
}
-
ext4_inode_csum_set(inode, raw_inode, ei);
-
spin_unlock(&ei->i_raw_lock);
+ if (inode->i_sb->s_flags & MS_LAZYTIME)
+ ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
+ bh->b_data);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
rc = ext4_handle_dirty_metadata(handle, NULL, bh);
@@ -4875,11 +4935,17 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
* If the inode is marked synchronous, we don't honour that here - doing
* so would cause a commit on atime updates, which we don't bother doing.
* We handle synchronous inodes at the highest possible level.
+ *
+ * If only the I_DIRTY_TIME flag is set, we can skip everything. If
+ * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
+ * to copy into the on-disk inode structure are the timestamp files.
*/
void ext4_dirty_inode(struct inode *inode, int flags)
{
handle_t *handle;
+ if (flags == I_DIRTY_TIME)
+ return;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle))
goto out;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 10e8c6b7ca08..1adac6868e6f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1126,6 +1126,7 @@ enum {
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
+ Opt_lazytime, Opt_nolazytime,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
@@ -1190,6 +1191,8 @@ static const match_table_t tokens = {
{Opt_dax, "dax"},
{Opt_stripe, "stripe=%u"},
{Opt_delalloc, "delalloc"},
+ {Opt_lazytime, "lazytime"},
+ {Opt_nolazytime, "nolazytime"},
{Opt_nodelalloc, "nodelalloc"},
{Opt_removed, "mblk_io_submit"},
{Opt_removed, "nomblk_io_submit"},
@@ -1448,6 +1451,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
case Opt_i_version:
sb->s_flags |= MS_I_VERSION;
return 1;
+ case Opt_lazytime:
+ sb->s_flags |= MS_LAZYTIME;
+ return 1;
+ case Opt_nolazytime:
+ sb->s_flags &= ~MS_LAZYTIME;
+ return 1;
}
for (m = ext4_mount_opts; m->token != Opt_err; m++)
@@ -5044,6 +5053,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
#endif
+ *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
kfree(orig_data);
return 0;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c399152de397..073657f755d4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -253,14 +253,19 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
return ret;
}
+#define EXPIRE_DIRTY_ATIME 0x0001
+
/*
* Move expired (dirtied before work->older_than_this) dirty inodes from
* @delaying_queue to @dispatch_queue.
*/
static int move_expired_inodes(struct list_head *delaying_queue,
struct list_head *dispatch_queue,
+ int flags,
struct wb_writeback_work *work)
{
+ unsigned long *older_than_this = NULL;
+ unsigned long expire_time;
LIST_HEAD(tmp);
struct list_head *pos, *node;
struct super_block *sb = NULL;
@@ -268,13 +273,21 @@ static int move_expired_inodes(struct list_head *delaying_queue,
int do_sb_sort = 0;
int moved = 0;
+ if ((flags & EXPIRE_DIRTY_ATIME) == 0)
+ older_than_this = work->older_than_this;
+ else if ((work->reason == WB_REASON_SYNC) == 0) {
+ expire_time = jiffies - (HZ * 86400);
+ older_than_this = &expire_time;
+ }
while (!list_empty(delaying_queue)) {
inode = wb_inode(delaying_queue->prev);
- if (work->older_than_this &&
- inode_dirtied_after(inode, *work->older_than_this))
+ if (older_than_this &&
+ inode_dirtied_after(inode, *older_than_this))
break;
list_move(&inode->i_wb_list, &tmp);
moved++;
+ if (flags & EXPIRE_DIRTY_ATIME)
+ set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
if (sb_is_blkdev_sb(inode->i_sb))
continue;
if (sb && sb != inode->i_sb)
@@ -315,9 +328,12 @@ out:
static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
{
int moved;
+
assert_spin_locked(&wb->list_lock);
list_splice_init(&wb->b_more_io, &wb->b_io);
- moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work);
+ moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
+ moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
+ EXPIRE_DIRTY_ATIME, work);
trace_writeback_queue_io(wb, work, moved);
}
@@ -441,6 +457,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* updates after data IO completion.
*/
redirty_tail(inode, wb);
+ } else if (inode->i_state & I_DIRTY_TIME) {
+ list_move(&inode->i_wb_list, &wb->b_dirty_time);
} else {
/* The inode is clean. Remove from writeback lists. */
list_del_init(&inode->i_wb_list);
@@ -487,7 +505,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY;
- inode->i_state &= ~I_DIRTY;
+ if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) &&
+ (inode->i_state & I_DIRTY_TIME)) ||
+ (inode->i_state & I_DIRTY_TIME_EXPIRED)) {
+ dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
+ trace_writeback_lazytime(inode);
+ }
+ inode->i_state &= ~dirty;
/*
* Paired with smp_mb() in __mark_inode_dirty(). This allows
@@ -507,8 +531,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_unlock(&inode->i_lock);
+ if (dirty & I_DIRTY_TIME)
+ mark_inode_dirty_sync(inode);
/* Don't write the inode if only I_DIRTY_PAGES was set */
- if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
+ if (dirty & ~I_DIRTY_PAGES) {
int err = write_inode(inode, wbc);
if (ret == 0)
ret = err;
@@ -556,7 +582,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
* make sure inode is on some writeback list and leave it there unless
* we have completely cleaned the inode.
*/
- if (!(inode->i_state & I_DIRTY) &&
+ if (!(inode->i_state & I_DIRTY_ALL) &&
(wbc->sync_mode != WB_SYNC_ALL ||
!mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
goto out;
@@ -571,7 +597,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
* If inode is clean, remove it from writeback lists. Otherwise don't
* touch it. See comment above for explanation.
*/
- if (!(inode->i_state & I_DIRTY))
+ if (!(inode->i_state & I_DIRTY_ALL))
list_del_init(&inode->i_wb_list);
spin_unlock(&wb->list_lock);
inode_sync_complete(inode);
@@ -713,7 +739,7 @@ static long writeback_sb_inodes(struct super_block *sb,
wrote += write_chunk - wbc.nr_to_write;
spin_lock(&wb->list_lock);
spin_lock(&inode->i_lock);
- if (!(inode->i_state & I_DIRTY))
+ if (!(inode->i_state & I_DIRTY_ALL))
wrote++;
requeue_inode(inode, wb, &wbc);
inode_sync_complete(inode);
@@ -1151,16 +1177,20 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
* page->mapping->host, so the page-dirtying time is recorded in the internal
* blockdev inode.
*/
+#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
void __mark_inode_dirty(struct inode *inode, int flags)
{
struct super_block *sb = inode->i_sb;
struct backing_dev_info *bdi = NULL;
+ int dirtytime;
+
+ trace_writeback_mark_inode_dirty(inode, flags);
/*
* Don't do this for I_DIRTY_PAGES - that doesn't actually
* dirty the inode itself
*/
- if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
+ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) {
trace_writeback_dirty_inode_start(inode, flags);
if (sb->s_op->dirty_inode)
@@ -1168,6 +1198,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
trace_writeback_dirty_inode(inode, flags);
}
+ if (flags & I_DIRTY_INODE)
+ flags &= ~I_DIRTY_TIME;
+ dirtytime = flags & I_DIRTY_TIME;
/*
* Paired with smp_mb() in __writeback_single_inode() for the
@@ -1175,16 +1208,21 @@ void __mark_inode_dirty(struct inode *inode, int flags)
*/
smp_mb();
- if ((inode->i_state & flags) == flags)
+ if (((inode->i_state & flags) == flags) ||
+ (dirtytime && (inode->i_state & I_DIRTY_INODE)))
return;
if (unlikely(block_dump))
block_dump___mark_inode_dirty(inode);
spin_lock(&inode->i_lock);
+ if (dirtytime && (inode->i_state & I_DIRTY_INODE))
+ goto out_unlock_inode;
if ((inode->i_state & flags) != flags) {
const int was_dirty = inode->i_state & I_DIRTY;
+ if (flags & I_DIRTY_INODE)
+ inode->i_state &= ~I_DIRTY_TIME;
inode->i_state |= flags;
/*
@@ -1231,8 +1269,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}
inode->dirtied_when = jiffies;
- list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
+ list_move(&inode->i_wb_list, dirtytime ?
+ &bdi->wb.b_dirty_time : &bdi->wb.b_dirty);
spin_unlock(&bdi->wb.list_lock);
+ trace_writeback_dirty_inode_enqueue(inode);
if (wakeup_bdi)
bdi_wakeup_thread_delayed(bdi);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index ec9c2d33477a..3e32bb8e2d7e 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -654,7 +654,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
{
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
- int sync_state = inode->i_state & I_DIRTY;
+ int sync_state = inode->i_state & I_DIRTY_ALL;
struct gfs2_inode *ip = GFS2_I(inode);
int ret = 0, ret1 = 0;
@@ -667,7 +667,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
if (!gfs2_is_jdata(ip))
sync_state &= ~I_DIRTY_PAGES;
if (datasync)
- sync_state &= ~I_DIRTY_SYNC;
+ sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME);
if (sync_state) {
ret = sync_inode_metadata(inode, 1);
diff --git a/fs/inode.c b/fs/inode.c
index 86bfaca724db..f00b16f45507 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -18,6 +18,7 @@
#include <linux/buffer_head.h> /* for inode_has_buffers */
#include <linux/ratelimit.h>
#include <linux/list_lru.h>
+#include <trace/events/writeback.h>
#include "internal.h"
/*
@@ -30,7 +31,7 @@
* inode_sb_list_lock protects:
* sb->s_inodes, inode->i_sb_list
* bdi->wb.list_lock protects:
- * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
+ * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list
* inode_hash_lock protects:
* inode_hashtable, inode->i_hash
*
@@ -403,7 +404,8 @@ static void inode_lru_list_add(struct inode *inode)
*/
void inode_add_lru(struct inode *inode)
{
- if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) &&
+ if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC |
+ I_FREEING | I_WILL_FREE)) &&
!atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
inode_lru_list_add(inode);
}
@@ -634,7 +636,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
spin_unlock(&inode->i_lock);
continue;
}
- if (inode->i_state & I_DIRTY && !kill_dirty) {
+ if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
spin_unlock(&inode->i_lock);
busy = 1;
continue;
@@ -1268,6 +1270,56 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
}
EXPORT_SYMBOL(ilookup);
+/**
+ * find_inode_nowait - find an inode in the inode cache
+ * @sb: super block of file system to search
+ * @hashval: hash value (usually inode number) to search for
+ * @match: callback used for comparisons between inodes
+ * @data: opaque data pointer to pass to @match
+ *
+ * Search for the inode specified by @hashval and @data in the inode
+ * cache, where the helper function @match will return 0 if the inode
+ * does not match, 1 if the inode does match, and -1 if the search
+ * should be stopped. The @match function must be responsible for
+ * taking the i_lock spin_lock and checking i_state for an inode being
+ * freed or being initialized, and incrementing the reference count
+ * before returning 1. It also must not sleep, since it is called with
+ * the inode_hash_lock spinlock held.
+ *
+ * This is a even more generalized version of ilookup5() when the
+ * function must never block --- find_inode() can block in
+ * __wait_on_freeing_inode() --- or when the caller can not increment
+ * the reference count because the resulting iput() might cause an
+ * inode eviction. The tradeoff is that the @match funtion must be
+ * very carefully implemented.
+ */
+struct inode *find_inode_nowait(struct super_block *sb,
+ unsigned long hashval,
+ int (*match)(struct inode *, unsigned long,
+ void *),
+ void *data)
+{
+ struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+ struct inode *inode, *ret_inode = NULL;
+ int mval;
+
+ spin_lock(&inode_hash_lock);
+ hlist_for_each_entry(inode, head, i_hash) {
+ if (inode->i_sb != sb)
+ continue;
+ mval = match(inode, hashval, data);
+ if (mval == 0)
+ continue;
+ if (mval == 1)
+ ret_inode = inode;
+ goto out;
+ }
+out:
+ spin_unlock(&inode_hash_lock);
+ return ret_inode;
+}
+EXPORT_SYMBOL(find_inode_nowait);
+
int insert_inode_locked(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
@@ -1418,11 +1470,20 @@ static void iput_final(struct inode *inode)
*/
void iput(struct inode *inode)
{
- if (inode) {
- BUG_ON(inode->i_state & I_CLEAR);
-
- if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock))
- iput_final(inode);
+ if (!inode)
+ return;
+ BUG_ON(inode->i_state & I_CLEAR);
+retry:
+ if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
+ if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
+ atomic_inc(&inode->i_count);
+ inode->i_state &= ~I_DIRTY_TIME;
+ spin_unlock(&inode->i_lock);
+ trace_writeback_lazytime_iput(inode);
+ mark_inode_dirty_sync(inode);
+ goto retry;
+ }
+ iput_final(inode);
}
}
EXPORT_SYMBOL(iput);
@@ -1481,14 +1542,9 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
return 0;
}
-/*
- * This does the actual work of updating an inodes time or version. Must have
- * had called mnt_want_write() before calling this.
- */
-static int update_time(struct inode *inode, struct timespec *time, int flags)
+int generic_update_time(struct inode *inode, struct timespec *time, int flags)
{
- if (inode->i_op->update_time)
- return inode->i_op->update_time(inode, time, flags);
+ int iflags = I_DIRTY_TIME;
if (flags & S_ATIME)
inode->i_atime = *time;
@@ -1498,9 +1554,27 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
inode->i_ctime = *time;
if (flags & S_MTIME)
inode->i_mtime = *time;
- mark_inode_dirty_sync(inode);
+
+ if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION))
+ iflags |= I_DIRTY_SYNC;
+ __mark_inode_dirty(inode, iflags);
return 0;
}
+EXPORT_SYMBOL(generic_update_time);
+
+/*
+ * This does the actual work of updating an inodes time or version. Must have
+ * had called mnt_want_write() before calling this.
+ */
+static int update_time(struct inode *inode, struct timespec *time, int flags)
+{
+ int (*update_time)(struct inode *, struct timespec *, int);
+
+ update_time = inode->i_op->update_time ? inode->i_op->update_time :
+ generic_update_time;
+
+ return update_time(inode, time, flags);
+}
/**
* touch_atime - update the access time
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 33aa0cc1f8b8..10815f8dfd8b 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -39,7 +39,7 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
return rc;
mutex_lock(&inode->i_mutex);
- if (!(inode->i_state & I_DIRTY) ||
+ if (!(inode->i_state & I_DIRTY_ALL) ||
(datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
/* Make sure committed changes hit the disk */
jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
diff --git a/fs/libfs.c b/fs/libfs.c
index 005843ce5dbd..b2ffdb045be4 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -948,7 +948,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
mutex_lock(&inode->i_mutex);
ret = sync_mapping_buffers(inode->i_mapping);
- if (!(inode->i_state & I_DIRTY))
+ if (!(inode->i_state & I_DIRTY_ALL))
goto out;
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
goto out;
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 0f96f71ab32b..8db932da4009 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -44,6 +44,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb)
{ MS_SYNCHRONOUS, ",sync" },
{ MS_DIRSYNC, ",dirsync" },
{ MS_MANDLOCK, ",mand" },
+ { MS_LAZYTIME, ",lazytime" },
{ 0, NULL }
};
const struct proc_fs_info *fs_infop;
diff --git a/fs/sync.c b/fs/sync.c
index 01d9f18a70b5..fbc98ee62044 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -177,8 +177,16 @@ SYSCALL_DEFINE1(syncfs, int, fd)
*/
int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
{
+ struct inode *inode = file->f_mapping->host;
+
if (!file->f_op->fsync)
return -EINVAL;
+ if (!datasync && (inode->i_state & I_DIRTY_TIME)) {
+ spin_lock(&inode->i_lock);
+ inode->i_state &= ~I_DIRTY_TIME;
+ spin_unlock(&inode->i_lock);
+ mark_inode_dirty_sync(inode);
+ }
return file->f_op->fsync(file, start, end, datasync);
}
EXPORT_SYMBOL(vfs_fsync_range);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index d94077fea1f8..aff923ae8c4b 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -55,6 +55,7 @@ struct bdi_writeback {
struct list_head b_dirty; /* dirty inodes */
struct list_head b_io; /* parked for writeback */
struct list_head b_more_io; /* parked for more writeback */
+ struct list_head b_dirty_time; /* time stamps are dirty */
spinlock_t list_lock; /* protects the b_* lists */
};
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0b5b146d0490..447932aed1e1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1790,8 +1790,12 @@ struct super_operations {
#define __I_DIO_WAKEUP 9
#define I_DIO_WAKEUP (1 << I_DIO_WAKEUP)
#define I_LINKABLE (1 << 10)
+#define I_DIRTY_TIME (1 << 11)
+#define __I_DIRTY_TIME_EXPIRED 12
+#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED)
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
+#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
extern void __mark_inode_dirty(struct inode *, int);
static inline void mark_inode_dirty(struct inode *inode)
@@ -1954,6 +1958,7 @@ extern int current_umask(void);
extern void ihold(struct inode * inode);
extern void iput(struct inode *);
+extern int generic_update_time(struct inode *, struct timespec *, int);
static inline struct inode *file_inode(const struct file *f)
{
@@ -2492,6 +2497,11 @@ extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
extern struct inode * iget_locked(struct super_block *, unsigned long);
+extern struct inode *find_inode_nowait(struct super_block *,
+ unsigned long,
+ int (*match)(struct inode *,
+ unsigned long, void *),
+ void *data);
extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
extern int insert_inode_locked(struct inode *);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 6cfb841fea7c..6e5abd6d38a2 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -73,6 +73,36 @@ struct extent_status;
{ FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
+TRACE_EVENT(ext4_other_inode_update_time,
+ TP_PROTO(struct inode *inode, ino_t orig_ino),
+
+ TP_ARGS(inode, orig_ino),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( ino_t, orig_ino )
+ __field( uid_t, uid )
+ __field( gid_t, gid )
+ __field( __u16, mode )
+ ),
+
+ TP_fast_assign(
+ __entry->orig_ino = orig_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->uid = i_uid_read(inode);
+ __entry->gid = i_gid_read(inode);
+ __entry->mode = inode->i_mode;
+ ),
+
+ TP_printk("dev %d,%d orig_ino %lu ino %lu mode 0%o uid %u gid %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->orig_ino,
+ (unsigned long) __entry->ino, __entry->mode,
+ __entry->uid, __entry->gid)
+);
+
TRACE_EVENT(ext4_free_inode,
TP_PROTO(struct inode *inode),
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 0e9310905413..5a14ead59696 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -18,6 +18,8 @@
{I_FREEING, "I_FREEING"}, \
{I_CLEAR, "I_CLEAR"}, \
{I_SYNC, "I_SYNC"}, \
+ {I_DIRTY_TIME, "I_DIRTY_TIME"}, \
+ {I_DIRTY_TIME_EXPIRED, "I_DIRTY_TIME_EXPIRED"}, \
{I_REFERENCED, "I_REFERENCED"} \
)
@@ -68,6 +70,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
TP_STRUCT__entry (
__array(char, name, 32)
__field(unsigned long, ino)
+ __field(unsigned long, state)
__field(unsigned long, flags)
),
@@ -78,16 +81,25 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
strncpy(__entry->name,
bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
__entry->ino = inode->i_ino;
+ __entry->state = inode->i_state;
__entry->flags = flags;
),
- TP_printk("bdi %s: ino=%lu flags=%s",
+ TP_printk("bdi %s: ino=%lu state=%s flags=%s",
__entry->name,
__entry->ino,
+ show_inode_state(__entry->state),
show_inode_state(__entry->flags)
)
);
+DEFINE_EVENT(writeback_dirty_inode_template, writeback_mark_inode_dirty,
+
+ TP_PROTO(struct inode *inode, int flags),
+
+ TP_ARGS(inode, flags)
+);
+
DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start,
TP_PROTO(struct inode *inode, int flags),
@@ -596,6 +608,52 @@ DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode,
TP_ARGS(inode, wbc, nr_to_write)
);
+DECLARE_EVENT_CLASS(writeback_lazytime_template,
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field(unsigned long, ino )
+ __field(unsigned long, state )
+ __field( __u16, mode )
+ __field(unsigned long, dirtied_when )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->state = inode->i_state;
+ __entry->mode = inode->i_mode;
+ __entry->dirtied_when = inode->dirtied_when;
+ ),
+
+ TP_printk("dev %d,%d ino %lu dirtied %lu state %s mode 0%o",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino, __entry->dirtied_when,
+ show_inode_state(__entry->state), __entry->mode)
+);
+
+DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime,
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode)
+);
+
+DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime_iput,
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode)
+);
+
+DEFINE_EVENT(writeback_lazytime_template, writeback_dirty_inode_enqueue,
+
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode)
+);
+
#endif /* _TRACE_WRITEBACK_H */
/* This part must be outside protection */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 3735fa0a6784..9b964a5920af 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -90,6 +90,7 @@ struct inodes_stat_t {
#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
#define MS_I_VERSION (1<<23) /* Update inode I_version field */
#define MS_STRICTATIME (1<<24) /* Always perform atime updates */
+#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
/* These sb flags are internal to the kernel */
#define MS_NOSEC (1<<28)
@@ -100,7 +101,8 @@ struct inodes_stat_t {
/*
* Superblock flags that can be altered by MS_REMOUNT
*/
-#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION)
+#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
+ MS_LAZYTIME)
/*
* Old magic mount flag and mask
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 7690ec77c722..6dc4580df2af 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -49,10 +49,10 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
unsigned long background_thresh;
unsigned long dirty_thresh;
unsigned long bdi_thresh;
- unsigned long nr_dirty, nr_io, nr_more_io;
+ unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
struct inode *inode;
- nr_dirty = nr_io = nr_more_io = 0;
+ nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
spin_lock(&wb->list_lock);
list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
nr_dirty++;
@@ -60,6 +60,9 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
nr_io++;
list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
nr_more_io++;
+ list_for_each_entry(inode, &wb->b_dirty_time, i_wb_list)
+ if (inode->i_state & I_DIRTY_TIME)
+ nr_dirty_time++;
spin_unlock(&wb->list_lock);
global_dirty_limits(&background_thresh, &dirty_thresh);
@@ -78,6 +81,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
"b_dirty: %10lu\n"
"b_io: %10lu\n"
"b_more_io: %10lu\n"
+ "b_dirty_time: %10lu\n"
"bdi_list: %10u\n"
"state: %10lx\n",
(unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
@@ -91,6 +95,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
nr_dirty,
nr_io,
nr_more_io,
+ nr_dirty_time,
!list_empty(&bdi->bdi_list), bdi->state);
#undef K
@@ -380,6 +385,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
INIT_LIST_HEAD(&wb->b_dirty);
INIT_LIST_HEAD(&wb->b_io);
INIT_LIST_HEAD(&wb->b_more_io);
+ INIT_LIST_HEAD(&wb->b_dirty_time);
spin_lock_init(&wb->list_lock);
INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
}