diff options
| author | Laurent Vivier <Laurent.Vivier@bull.net> | 2008-04-29 01:02:46 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-29 08:06:23 -0700 | 
| commit | 48cf6061b30205b29b306bf9bc22dd6f0b091461 (patch) | |
| tree | bba7827177587a17160c29392651aa27aa7f98fd | |
| parent | 8c4dd6068221cd1d0d90490ace80eb4344914a8c (diff) | |
NBD: allow nbd to be used locally
This patch allows Network Block Device to be mounted locally (nbd-client to
nbd-server over 127.0.0.1).
It creates a kthread to avoid the deadlock described in NBD tools
documentation.  So, if nbd-client hangs waiting for pages, the kblockd thread
can continue its work and free pages.
I have tested the patch to verify that it avoids the hang that always occurs
when writing to a localhost nbd connection.  I have also tested to verify that
no performance degradation results from the additional thread and queue.
Patch originally from Laurent Vivier.
Signed-off-by: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | drivers/block/nbd.c | 144 | ||||
| -rw-r--r-- | include/linux/nbd.h | 4 | 
2 files changed, 98 insertions, 50 deletions
| diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 60cc54368b66..8e33de6bea33 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -29,6 +29,7 @@  #include <linux/kernel.h>  #include <net/sock.h>  #include <linux/net.h> +#include <linux/kthread.h>  #include <asm/uaccess.h>  #include <asm/system.h> @@ -441,6 +442,85 @@ static void nbd_clear_que(struct nbd_device *lo)  } +static void nbd_handle_req(struct nbd_device *lo, struct request *req) +{ +	if (!blk_fs_request(req)) +		goto error_out; + +	nbd_cmd(req) = NBD_CMD_READ; +	if (rq_data_dir(req) == WRITE) { +		nbd_cmd(req) = NBD_CMD_WRITE; +		if (lo->flags & NBD_READ_ONLY) { +			printk(KERN_ERR "%s: Write on read-only\n", +					lo->disk->disk_name); +			goto error_out; +		} +	} + +	req->errors = 0; + +	mutex_lock(&lo->tx_lock); +	if (unlikely(!lo->sock)) { +		mutex_unlock(&lo->tx_lock); +		printk(KERN_ERR "%s: Attempted send on closed socket\n", +		       lo->disk->disk_name); +		req->errors++; +		nbd_end_request(req); +		return; +	} + +	lo->active_req = req; + +	if (nbd_send_req(lo, req) != 0) { +		printk(KERN_ERR "%s: Request send failed\n", +				lo->disk->disk_name); +		req->errors++; +		nbd_end_request(req); +	} else { +		spin_lock(&lo->queue_lock); +		list_add(&req->queuelist, &lo->queue_head); +		spin_unlock(&lo->queue_lock); +	} + +	lo->active_req = NULL; +	mutex_unlock(&lo->tx_lock); +	wake_up_all(&lo->active_wq); + +	return; + +error_out: +	req->errors++; +	nbd_end_request(req); +} + +static int nbd_thread(void *data) +{ +	struct nbd_device *lo = data; +	struct request *req; + +	set_user_nice(current, -20); +	while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) { +		/* wait for something to do */ +		wait_event_interruptible(lo->waiting_wq, +					 kthread_should_stop() || +					 !list_empty(&lo->waiting_queue)); + +		/* extract request */ +		if (list_empty(&lo->waiting_queue)) +			continue; + +		spin_lock_irq(&lo->queue_lock); +		req = list_entry(lo->waiting_queue.next, struct request, +				 queuelist); +		list_del_init(&req->queuelist); +		spin_unlock_irq(&lo->queue_lock); + +		/* handle request */ +		nbd_handle_req(lo, req); +	} +	return 0; +} +  /*   * We always wait for result of write, for now. It would be nice to make it optional   * in future @@ -456,65 +536,23 @@ static void do_nbd_request(struct request_queue * q)  		struct nbd_device *lo;  		blkdev_dequeue_request(req); + +		spin_unlock_irq(q->queue_lock); +  		dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",  				req->rq_disk->disk_name, req, req->cmd_type); -		if (!blk_fs_request(req)) -			goto error_out; -  		lo = req->rq_disk->private_data;  		BUG_ON(lo->magic != LO_MAGIC); -		nbd_cmd(req) = NBD_CMD_READ; -		if (rq_data_dir(req) == WRITE) { -			nbd_cmd(req) = NBD_CMD_WRITE; -			if (lo->flags & NBD_READ_ONLY) { -				printk(KERN_ERR "%s: Write on read-only\n", -						lo->disk->disk_name); -				goto error_out; -			} -		} - -		req->errors = 0; -		spin_unlock_irq(q->queue_lock); - -		mutex_lock(&lo->tx_lock); -		if (unlikely(!lo->sock)) { -			mutex_unlock(&lo->tx_lock); -			printk(KERN_ERR "%s: Attempted send on closed socket\n", -			       lo->disk->disk_name); -			req->errors++; -			nbd_end_request(req); -			spin_lock_irq(q->queue_lock); -			continue; -		} +		spin_lock_irq(&lo->queue_lock); +		list_add_tail(&req->queuelist, &lo->waiting_queue); +		spin_unlock_irq(&lo->queue_lock); -		lo->active_req = req; - -		if (nbd_send_req(lo, req) != 0) { -			printk(KERN_ERR "%s: Request send failed\n", -					lo->disk->disk_name); -			req->errors++; -			nbd_end_request(req); -		} else { -			spin_lock(&lo->queue_lock); -			list_add(&req->queuelist, &lo->queue_head); -			spin_unlock(&lo->queue_lock); -		} - -		lo->active_req = NULL; -		mutex_unlock(&lo->tx_lock); -		wake_up_all(&lo->active_wq); +		wake_up(&lo->waiting_wq);  		spin_lock_irq(q->queue_lock); -		continue; - -error_out: -		req->errors++; -		spin_unlock(q->queue_lock); -		nbd_end_request(req); -		spin_lock(q->queue_lock);  	}  } @@ -524,6 +562,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file,  	struct nbd_device *lo = inode->i_bdev->bd_disk->private_data;  	int error;  	struct request sreq ; +	struct task_struct *thread;  	if (!capable(CAP_SYS_ADMIN))  		return -EPERM; @@ -606,7 +645,12 @@ static int nbd_ioctl(struct inode *inode, struct file *file,  	case NBD_DO_IT:  		if (!lo->file)  			return -EINVAL; +		thread = kthread_create(nbd_thread, lo, lo->disk->disk_name); +		if (IS_ERR(thread)) +			return PTR_ERR(thread); +		wake_up_process(thread);  		error = nbd_do_it(lo); +		kthread_stop(thread);  		if (error)  			return error;  		sock_shutdown(lo, 1); @@ -695,10 +739,12 @@ static int __init nbd_init(void)  		nbd_dev[i].file = NULL;  		nbd_dev[i].magic = LO_MAGIC;  		nbd_dev[i].flags = 0; +		INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);  		spin_lock_init(&nbd_dev[i].queue_lock);  		INIT_LIST_HEAD(&nbd_dev[i].queue_head);  		mutex_init(&nbd_dev[i].tx_lock);  		init_waitqueue_head(&nbd_dev[i].active_wq); +		init_waitqueue_head(&nbd_dev[i].waiting_wq);  		nbd_dev[i].blksize = 1024;  		nbd_dev[i].bytesize = 0;  		disk->major = NBD_MAJOR; diff --git a/include/linux/nbd.h b/include/linux/nbd.h index 986572081e19..69075517c511 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -56,9 +56,11 @@ struct nbd_device {  	int magic;  	spinlock_t queue_lock; -	struct list_head queue_head;/* Requests are added here...	*/ +	struct list_head queue_head;	/* Requests waiting result */  	struct request *active_req;  	wait_queue_head_t active_wq; +	struct list_head waiting_queue;	/* Requests to be sent */ +	wait_queue_head_t waiting_wq;  	struct mutex tx_lock;  	struct gendisk *disk; | 
