diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-18 16:50:28 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-18 16:50:28 -0700 | 
| commit | 3f80fbff5f1ef8a842bbe5343bbc9ddad883f25c (patch) | |
| tree | 259f5be0f203b5c7e97837a1957f461abcfef5d0 | |
| parent | fce519588acfac249e8fdc1f5016c73d617de315 (diff) | |
| parent | 24307aa1e707b31613be92deaba7990e16bc1aec (diff) | |
Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2
* 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2:
  configfs: Fix race between configfs_readdir() and configfs_d_iput()
  configfs: Don't try to d_delete() negative dentries.
  ocfs2/dlm: Target node death during resource migration leads to thread spin
  ocfs2: Skip mount recovery for hard-ro mounts
  ocfs2/cluster: Heartbeat mismatch message improved
  ocfs2/cluster: Increase the live threshold for global heartbeat
  ocfs2/dlm: Use negotiated o2dlm protocol version
  ocfs2: skip existing hole when removing the last extent_rec in punching-hole codes.
  ocfs2: Initialize data_ac (might be used uninitialized)
| -rw-r--r-- | fs/configfs/dir.c | 39 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 61 | ||||
| -rw-r--r-- | fs/ocfs2/dir.c | 2 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 3 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 3 | ||||
| -rw-r--r-- | fs/ocfs2/file.c | 12 | ||||
| -rw-r--r-- | fs/ocfs2/journal.c | 3 | 
7 files changed, 96 insertions, 27 deletions
| diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 3313dd19f543..9a37a9b6de3a 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -53,11 +53,14 @@ DEFINE_SPINLOCK(configfs_dirent_lock);  static void configfs_d_iput(struct dentry * dentry,  			    struct inode * inode)  { -	struct configfs_dirent * sd = dentry->d_fsdata; +	struct configfs_dirent *sd = dentry->d_fsdata;  	if (sd) {  		BUG_ON(sd->s_dentry != dentry); +		/* Coordinate with configfs_readdir */ +		spin_lock(&configfs_dirent_lock);  		sd->s_dentry = NULL; +		spin_unlock(&configfs_dirent_lock);  		configfs_put(sd);  	}  	iput(inode); @@ -689,7 +692,8 @@ static int create_default_group(struct config_group *parent_group,  			sd = child->d_fsdata;  			sd->s_type |= CONFIGFS_USET_DEFAULT;  		} else { -			d_delete(child); +			BUG_ON(child->d_inode); +			d_drop(child);  			dput(child);  		}  	} @@ -1545,7 +1549,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir  	struct configfs_dirent * parent_sd = dentry->d_fsdata;  	struct configfs_dirent *cursor = filp->private_data;  	struct list_head *p, *q = &cursor->s_sibling; -	ino_t ino; +	ino_t ino = 0;  	int i = filp->f_pos;  	switch (i) { @@ -1573,6 +1577,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir  				struct configfs_dirent *next;  				const char * name;  				int len; +				struct inode *inode = NULL;  				next = list_entry(p, struct configfs_dirent,  						   s_sibling); @@ -1581,9 +1586,28 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir  				name = configfs_get_name(next);  				len = strlen(name); -				if (next->s_dentry) -					ino = next->s_dentry->d_inode->i_ino; -				else + +				/* +				 * We'll have a dentry and an inode for +				 * PINNED items and for open attribute +				 * files.  We lock here to prevent a race +				 * with configfs_d_iput() clearing +				 * s_dentry before calling iput(). +				 * +				 * Why do we go to the trouble?  If +				 * someone has an attribute file open, +				 * the inode number should match until +				 * they close it.  Beyond that, we don't +				 * care. +				 */ +				spin_lock(&configfs_dirent_lock); +				dentry = next->s_dentry; +				if (dentry) +					inode = dentry->d_inode; +				if (inode) +					ino = inode->i_ino; +				spin_unlock(&configfs_dirent_lock); +				if (!inode)  					ino = iunique(configfs_sb, 2);  				if (filldir(dirent, name, len, filp->f_pos, ino, @@ -1683,7 +1707,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)  		err = configfs_attach_group(sd->s_element, &group->cg_item,  					    dentry);  		if (err) { -			d_delete(dentry); +			BUG_ON(dentry->d_inode); +			d_drop(dentry);  			dput(dentry);  		} else {  			spin_lock(&configfs_dirent_lock); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 643720209a98..9a3e6bbff27b 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg,  /* We want to make sure that nobody is heartbeating on top of us --   * this will help detect an invalid configuration. */ -static int o2hb_check_last_timestamp(struct o2hb_region *reg) +static void o2hb_check_last_timestamp(struct o2hb_region *reg)  { -	int node_num, ret;  	struct o2hb_disk_slot *slot;  	struct o2hb_disk_heartbeat_block *hb_block; +	char *errstr; -	node_num = o2nm_this_node(); - -	ret = 1; -	slot = ®->hr_slots[node_num]; +	slot = ®->hr_slots[o2nm_this_node()];  	/* Don't check on our 1st timestamp */ -	if (slot->ds_last_time) { -		hb_block = slot->ds_raw_block; +	if (!slot->ds_last_time) +		return; -		if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time) -			ret = 0; -	} +	hb_block = slot->ds_raw_block; +	if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time && +	    le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation && +	    hb_block->hb_node == slot->ds_node_num) +		return; -	return ret; +#define ERRSTR1		"Another node is heartbeating on device" +#define ERRSTR2		"Heartbeat generation mismatch on device" +#define ERRSTR3		"Heartbeat sequence mismatch on device" + +	if (hb_block->hb_node != slot->ds_node_num) +		errstr = ERRSTR1; +	else if (le64_to_cpu(hb_block->hb_generation) != +		 slot->ds_last_generation) +		errstr = ERRSTR2; +	else +		errstr = ERRSTR3; + +	mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), " +	     "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name, +	     slot->ds_node_num, (unsigned long long)slot->ds_last_generation, +	     (unsigned long long)slot->ds_last_time, hb_block->hb_node, +	     (unsigned long long)le64_to_cpu(hb_block->hb_generation), +	     (unsigned long long)le64_to_cpu(hb_block->hb_seq));  }  static inline void o2hb_prepare_block(struct o2hb_region *reg, @@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)  	/* With an up to date view of the slots, we can check that no  	 * other node has been improperly configured to heartbeat in  	 * our slot. */ -	if (!o2hb_check_last_timestamp(reg)) -		mlog(ML_ERROR, "Device \"%s\": another node is heartbeating " -		     "in our slot!\n", reg->hr_dev_name); +	o2hb_check_last_timestamp(reg);  	/* fill in the proper info for our next heartbeat */  	o2hb_prepare_block(reg, reg->hr_generation); @@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)  	}  	i = -1; -	while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { - +	while((i = find_next_bit(configured_nodes, +				 O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {  		change |= o2hb_check_slot(reg, ®->hr_slots[i]);  	} @@ -1690,6 +1704,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,  	struct file *filp = NULL;  	struct inode *inode = NULL;  	ssize_t ret = -EINVAL; +	int live_threshold;  	if (reg->hr_bdev)  		goto out; @@ -1766,8 +1781,18 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,  	 * A node is considered live after it has beat LIVE_THRESHOLD  	 * times.  We're not steady until we've given them a chance  	 * _after_ our first read. +	 * The default threshold is bare minimum so as to limit the delay +	 * during mounts. For global heartbeat, the threshold doubled for the +	 * first region.  	 */ -	atomic_set(®->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1); +	live_threshold = O2HB_LIVE_THRESHOLD; +	if (o2hb_global_heartbeat_active()) { +		spin_lock(&o2hb_live_lock); +		if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1) +			live_threshold <<= 1; +		spin_unlock(&o2hb_live_lock); +	} +	atomic_set(®->hr_steady_iterations, live_threshold + 1);  	hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",  			      reg->hr_item.ci_name); diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 9fe5b8fd658f..8582e3f4f120 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2868,7 +2868,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,  		bytes = blocks_wanted << sb->s_blocksize_bits;  	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);  	struct ocfs2_inode_info *oi = OCFS2_I(dir); -	struct ocfs2_alloc_context *data_ac; +	struct ocfs2_alloc_context *data_ac = NULL;  	struct ocfs2_alloc_context *meta_ac = NULL;  	struct buffer_head *dirdata_bh = NULL;  	struct buffer_head *dx_root_bh = NULL; diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 7540a492eaba..3b179d6cbde0 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1614,7 +1614,8 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)  	spin_unlock(&dlm->spinlock);  	/* Support for global heartbeat and node info was added in 1.1 */ -	if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) { +	if (dlm->dlm_locking_proto.pv_major > 1 || +	    dlm->dlm_locking_proto.pv_minor > 0) {  		status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map);  		if (status) {  			mlog_errno(status); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index fede57ed005f..84d166328cf7 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2574,6 +2574,9 @@ fail:  		res->state &= ~DLM_LOCK_RES_MIGRATING;  		wake = 1;  		spin_unlock(&res->spinlock); +		if (dlm_is_host_down(ret)) +			dlm_wait_for_node_death(dlm, target, +						DLM_NODE_DEATH_WAIT_MAX);  		goto leave;  	} diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 41565ae52856..89659d6dc206 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1607,6 +1607,9 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,  	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);  	if (le32_to_cpu(rec->e_cpos) >= trunc_start) { +		/* +		 * remove an entire extent record. +		 */  		*trunc_cpos = le32_to_cpu(rec->e_cpos);  		/*  		 * Skip holes if any. @@ -1617,7 +1620,16 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,  		*blkno = le64_to_cpu(rec->e_blkno);  		*trunc_end = le32_to_cpu(rec->e_cpos);  	} else if (range > trunc_start) { +		/* +		 * remove a partial extent record, which means we're +		 * removing the last extent record. +		 */  		*trunc_cpos = trunc_start; +		/* +		 * skip hole if any. +		 */ +		if (range < *trunc_end) +			*trunc_end = range;  		*trunc_len = *trunc_end - trunc_start;  		coff = trunc_start - le32_to_cpu(rec->e_cpos);  		*blkno = le64_to_cpu(rec->e_blkno) + diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index b141a44605ca..295d56454e8b 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1260,6 +1260,9 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)  {  	struct ocfs2_journal *journal = osb->journal; +	if (ocfs2_is_hard_readonly(osb)) +		return; +  	/* No need to queue up our truncate_log as regular cleanup will catch  	 * that */  	ocfs2_queue_recovery_completion(journal, osb->slot_num, | 
