summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/aoe/aoeblk.c14
-rw-r--r--drivers/block/brd.c4
-rw-r--r--drivers/block/cciss.c76
-rw-r--r--drivers/block/cciss.h1
-rw-r--r--drivers/block/cpqarray.c2
-rw-r--r--drivers/block/drbd/drbd_int.h2
-rw-r--r--drivers/block/drbd/drbd_req.c8
-rw-r--r--drivers/block/loop.c251
-rw-r--r--drivers/block/nbd.c69
-rw-r--r--drivers/block/pktcdvd.c11
-rw-r--r--drivers/block/ps3vram.c6
-rw-r--r--drivers/block/umem.c4
-rw-r--r--drivers/block/virtio_blk.c30
-rw-r--r--drivers/block/xen-blkback/blkback.c130
-rw-r--r--drivers/block/xen-blkback/common.h103
-rw-r--r--drivers/block/xen-blkback/xenbus.c130
-rw-r--r--drivers/block/xen-blkfront.c123
17 files changed, 647 insertions, 317 deletions
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index c01795d00aa5..321de7b6c442 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -160,7 +160,7 @@ aoeblk_release(struct gendisk *disk, fmode_t mode)
return 0;
}
-static int
+static void
aoeblk_make_request(struct request_queue *q, struct bio *bio)
{
struct sk_buff_head queue;
@@ -173,25 +173,25 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
if (bio == NULL) {
printk(KERN_ERR "aoe: bio is NULL\n");
BUG();
- return 0;
+ return;
}
d = bio->bi_bdev->bd_disk->private_data;
if (d == NULL) {
printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n");
BUG();
bio_endio(bio, -ENXIO);
- return 0;
+ return;
} else if (bio->bi_io_vec == NULL) {
printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
BUG();
bio_endio(bio, -ENXIO);
- return 0;
+ return;
}
buf = mempool_alloc(d->bufpool, GFP_NOIO);
if (buf == NULL) {
printk(KERN_INFO "aoe: buf allocation failure\n");
bio_endio(bio, -ENOMEM);
- return 0;
+ return;
}
memset(buf, 0, sizeof(*buf));
INIT_LIST_HEAD(&buf->bufs);
@@ -212,7 +212,7 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
spin_unlock_irqrestore(&d->lock, flags);
mempool_free(buf, d->bufpool);
bio_endio(bio, -ENXIO);
- return 0;
+ return;
}
list_add_tail(&buf->bufs, &d->bufq);
@@ -223,8 +223,6 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
spin_unlock_irqrestore(&d->lock, flags);
aoenet_xmit(&queue);
-
- return 0;
}
static int
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index dba1c32e1ddf..d22119d49e53 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -323,7 +323,7 @@ out:
return err;
}
-static int brd_make_request(struct request_queue *q, struct bio *bio)
+static void brd_make_request(struct request_queue *q, struct bio *bio)
{
struct block_device *bdev = bio->bi_bdev;
struct brd_device *brd = bdev->bd_disk->private_data;
@@ -359,8 +359,6 @@ static int brd_make_request(struct request_queue *q, struct bio *bio)
out:
bio_endio(bio, err);
-
- return 0;
}
#ifdef CONFIG_BLK_DEV_XIP
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 8f4ef656a1af..486f94ef24d4 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -68,6 +68,10 @@ static int cciss_tape_cmds = 6;
module_param(cciss_tape_cmds, int, 0644);
MODULE_PARM_DESC(cciss_tape_cmds,
"number of commands to allocate for tape devices (default: 6)");
+static int cciss_simple_mode;
+module_param(cciss_simple_mode, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(cciss_simple_mode,
+ "Use 'simple mode' rather than 'performant mode'");
static DEFINE_MUTEX(cciss_mutex);
static struct proc_dir_entry *proc_cciss;
@@ -176,6 +180,7 @@ static void cciss_geometry_inquiry(ctlr_info_t *h, int logvol,
unsigned int block_size, InquiryData_struct *inq_buff,
drive_info_struct *drv);
static void __devinit cciss_interrupt_mode(ctlr_info_t *);
+static int __devinit cciss_enter_simple_mode(struct ctlr_info *h);
static void start_io(ctlr_info_t *h);
static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size,
__u8 page_code, unsigned char scsi3addr[],
@@ -388,7 +393,7 @@ static void cciss_seq_show_header(struct seq_file *seq)
h->product_name,
(unsigned long)h->board_id,
h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
- h->firm_ver[3], (unsigned int)h->intr[PERF_MODE_INT],
+ h->firm_ver[3], (unsigned int)h->intr[h->intr_mode],
h->num_luns,
h->Qdepth, h->commands_outstanding,
h->maxQsinceinit, h->max_outstanding, h->maxSG);
@@ -636,6 +641,18 @@ static ssize_t host_store_rescan(struct device *dev,
}
static DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan);
+static ssize_t host_show_transport_mode(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ctlr_info *h = to_hba(dev);
+
+ return snprintf(buf, 20, "%s\n",
+ h->transMethod & CFGTBL_Trans_Performant ?
+ "performant" : "simple");
+}
+static DEVICE_ATTR(transport_mode, S_IRUGO, host_show_transport_mode, NULL);
+
static ssize_t dev_show_unique_id(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -808,6 +825,7 @@ static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL);
static struct attribute *cciss_host_attrs[] = {
&dev_attr_rescan.attr,
&dev_attr_resettable.attr,
+ &dev_attr_transport_mode.attr,
NULL
};
@@ -3984,6 +4002,9 @@ static void __devinit cciss_put_controller_into_performant_mode(ctlr_info_t *h)
{
__u32 trans_support;
+ if (cciss_simple_mode)
+ return;
+
dev_dbg(&h->pdev->dev, "Trying to put board into Performant mode\n");
/* Attempt to put controller into performant mode if supported */
/* Does board support performant mode? */
@@ -4081,7 +4102,7 @@ static void __devinit cciss_interrupt_mode(ctlr_info_t *h)
default_int_mode:
#endif /* CONFIG_PCI_MSI */
/* if we get here we're going to use the default interrupt mode */
- h->intr[PERF_MODE_INT] = h->pdev->irq;
+ h->intr[h->intr_mode] = h->pdev->irq;
return;
}
@@ -4341,6 +4362,9 @@ static int __devinit cciss_pci_init(ctlr_info_t *h)
}
cciss_enable_scsi_prefetch(h);
cciss_p600_dma_prefetch_quirk(h);
+ err = cciss_enter_simple_mode(h);
+ if (err)
+ goto err_out_free_res;
cciss_put_controller_into_performant_mode(h);
return 0;
@@ -4533,6 +4557,13 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev,
pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
pmcsr |= PCI_D0;
pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+ /*
+ * The P600 requires a small delay when changing states.
+ * Otherwise we may think the board did not reset and we bail.
+ * This for kdump only and is particular to the P600.
+ */
+ msleep(500);
}
return 0;
}
@@ -4843,20 +4874,20 @@ static int cciss_request_irq(ctlr_info_t *h,
irqreturn_t (*intxhandler)(int, void *))
{
if (h->msix_vector || h->msi_vector) {
- if (!request_irq(h->intr[PERF_MODE_INT], msixhandler,
+ if (!request_irq(h->intr[h->intr_mode], msixhandler,
IRQF_DISABLED, h->devname, h))
return 0;
dev_err(&h->pdev->dev, "Unable to get msi irq %d"
- " for %s\n", h->intr[PERF_MODE_INT],
+ " for %s\n", h->intr[h->intr_mode],
h->devname);
return -1;
}
- if (!request_irq(h->intr[PERF_MODE_INT], intxhandler,
+ if (!request_irq(h->intr[h->intr_mode], intxhandler,
IRQF_DISABLED, h->devname, h))
return 0;
dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
- h->intr[PERF_MODE_INT], h->devname);
+ h->intr[h->intr_mode], h->devname);
return -1;
}
@@ -4887,7 +4918,7 @@ static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h)
{
int ctlr = h->ctlr;
- free_irq(h->intr[PERF_MODE_INT], h);
+ free_irq(h->intr[h->intr_mode], h);
#ifdef CONFIG_PCI_MSI
if (h->msix_vector)
pci_disable_msix(h->pdev);
@@ -4953,6 +4984,7 @@ reinit_after_soft_reset:
h = hba[i];
h->pdev = pdev;
h->busy_initializing = 1;
+ h->intr_mode = cciss_simple_mode ? SIMPLE_MODE_INT : PERF_MODE_INT;
INIT_LIST_HEAD(&h->cmpQ);
INIT_LIST_HEAD(&h->reqQ);
mutex_init(&h->busy_shutting_down);
@@ -5009,7 +5041,7 @@ reinit_after_soft_reset:
dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
h->devname, pdev->device, pci_name(pdev),
- h->intr[PERF_MODE_INT], dac ? "" : " not");
+ h->intr[h->intr_mode], dac ? "" : " not");
if (cciss_allocate_cmd_pool(h))
goto clean4;
@@ -5056,7 +5088,7 @@ reinit_after_soft_reset:
spin_lock_irqsave(&h->lock, flags);
h->access.set_intr_mask(h, CCISS_INTR_OFF);
spin_unlock_irqrestore(&h->lock, flags);
- free_irq(h->intr[PERF_MODE_INT], h);
+ free_irq(h->intr[h->intr_mode], h);
rc = cciss_request_irq(h, cciss_msix_discard_completions,
cciss_intx_discard_completions);
if (rc) {
@@ -5133,7 +5165,7 @@ clean4:
cciss_free_cmd_pool(h);
cciss_free_scatterlists(h);
cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
- free_irq(h->intr[PERF_MODE_INT], h);
+ free_irq(h->intr[h->intr_mode], h);
clean2:
unregister_blkdev(h->major, h->devname);
clean1:
@@ -5172,9 +5204,31 @@ static void cciss_shutdown(struct pci_dev *pdev)
if (return_code != IO_OK)
dev_warn(&h->pdev->dev, "Error flushing cache\n");
h->access.set_intr_mask(h, CCISS_INTR_OFF);
- free_irq(h->intr[PERF_MODE_INT], h);
+ free_irq(h->intr[h->intr_mode], h);
+}
+
+static int __devinit cciss_enter_simple_mode(struct ctlr_info *h)
+{
+ u32 trans_support;
+
+ trans_support = readl(&(h->cfgtable->TransportSupport));
+ if (!(trans_support & SIMPLE_MODE))
+ return -ENOTSUPP;
+
+ h->max_commands = readl(&(h->cfgtable->CmdsOutMax));
+ writel(CFGTBL_Trans_Simple, &(h->cfgtable->HostWrite.TransportRequest));
+ writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
+ cciss_wait_for_mode_change_ack(h);
+ print_cfg_table(h);
+ if (!(readl(&(h->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
+ dev_warn(&h->pdev->dev, "unable to get board into simple mode\n");
+ return -ENODEV;
+ }
+ h->transMethod = CFGTBL_Trans_Simple;
+ return 0;
}
+
static void __devexit cciss_remove_one(struct pci_dev *pdev)
{
ctlr_info_t *h;
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index c049548e68b7..7fda30e4a241 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -92,6 +92,7 @@ struct ctlr_info
unsigned int intr[4];
unsigned int msix_vector;
unsigned int msi_vector;
+ int intr_mode;
int cciss_max_sectors;
BYTE cciss_read;
BYTE cciss_write;
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index b2fceb53e809..9125bbeacd4d 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -620,6 +620,7 @@ static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
}
vendor_id = pdev->vendor;
device_id = pdev->device;
+ revision = pdev->revision;
irq = pdev->irq;
for(i=0; i<6; i++)
@@ -632,7 +633,6 @@ static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
}
pci_read_config_word(pdev, PCI_COMMAND, &command);
- pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision);
pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_line_size);
pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &latency_timer);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 1706d60b8c99..9cf20355ceec 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1506,7 +1506,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev);
extern int proc_details;
/* drbd_req */
-extern int drbd_make_request(struct request_queue *q, struct bio *bio);
+extern void drbd_make_request(struct request_queue *q, struct bio *bio);
extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req);
extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec);
extern int is_valid_ar_handle(struct drbd_request *, sector_t);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 3424d675b769..4a0f314086e5 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1073,7 +1073,7 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
return 0;
}
-int drbd_make_request(struct request_queue *q, struct bio *bio)
+void drbd_make_request(struct request_queue *q, struct bio *bio)
{
unsigned int s_enr, e_enr;
struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
@@ -1081,7 +1081,7 @@ int drbd_make_request(struct request_queue *q, struct bio *bio)
if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) {
bio_endio(bio, -EPERM);
- return 0;
+ return;
}
start_time = jiffies;
@@ -1100,7 +1100,8 @@ int drbd_make_request(struct request_queue *q, struct bio *bio)
if (likely(s_enr == e_enr)) {
inc_ap_bio(mdev, 1);
- return drbd_make_request_common(mdev, bio, start_time);
+ drbd_make_request_common(mdev, bio, start_time);
+ return;
}
/* can this bio be split generically?
@@ -1148,7 +1149,6 @@ int drbd_make_request(struct request_queue *q, struct bio *bio)
bio_pair_release(bp);
}
- return 0;
}
/* This is called by bio_add_page(). With this function we reduce
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 4720c7ade0ae..3d806820280e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -76,6 +76,8 @@
#include <linux/splice.h>
#include <linux/sysfs.h>
#include <linux/miscdevice.h>
+#include <linux/falloc.h>
+
#include <asm/uaccess.h>
static DEFINE_IDR(loop_index_idr);
@@ -203,74 +205,6 @@ lo_do_transfer(struct loop_device *lo, int cmd,
}
/**
- * do_lo_send_aops - helper for writing data to a loop device
- *
- * This is the fast version for backing filesystems which implement the address
- * space operations write_begin and write_end.
- */
-static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
- loff_t pos, struct page *unused)
-{
- struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
- struct address_space *mapping = file->f_mapping;
- pgoff_t index;
- unsigned offset, bv_offs;
- int len, ret;
-
- mutex_lock(&mapping->host->i_mutex);
- index = pos >> PAGE_CACHE_SHIFT;
- offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
- bv_offs = bvec->bv_offset;
- len = bvec->bv_len;
- while (len > 0) {
- sector_t IV;
- unsigned size, copied;
- int transfer_result;
- struct page *page;
- void *fsdata;
-
- IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
- size = PAGE_CACHE_SIZE - offset;
- if (size > len)
- size = len;
-
- ret = pagecache_write_begin(file, mapping, pos, size, 0,
- &page, &fsdata);
- if (ret)
- goto fail;
-
- file_update_time(file);
-
- transfer_result = lo_do_transfer(lo, WRITE, page, offset,
- bvec->bv_page, bv_offs, size, IV);
- copied = size;
- if (unlikely(transfer_result))
- copied = 0;
-
- ret = pagecache_write_end(file, mapping, pos, size, copied,
- page, fsdata);
- if (ret < 0 || ret != copied)
- goto fail;
-
- if (unlikely(transfer_result))
- goto fail;
-
- bv_offs += copied;
- len -= copied;
- offset = 0;
- index++;
- pos += copied;
- }
- ret = 0;
-out:
- mutex_unlock(&mapping->host->i_mutex);
- return ret;
-fail:
- ret = -1;
- goto out;
-}
-
-/**
* __do_lo_send_write - helper for writing data to a loop device
*
* This helper just factors out common code between do_lo_send_direct_write()
@@ -297,10 +231,8 @@ static int __do_lo_send_write(struct file *file,
/**
* do_lo_send_direct_write - helper for writing data to a loop device
*
- * This is the fast, non-transforming version for backing filesystems which do
- * not implement the address space operations write_begin and write_end.
- * It uses the write file operation which should be present on all writeable
- * filesystems.
+ * This is the fast, non-transforming version that does not need double
+ * buffering.
*/
static int do_lo_send_direct_write(struct loop_device *lo,
struct bio_vec *bvec, loff_t pos, struct page *page)
@@ -316,15 +248,9 @@ static int do_lo_send_direct_write(struct loop_device *lo,
/**
* do_lo_send_write - helper for writing data to a loop device
*
- * This is the slow, transforming version for filesystems which do not
- * implement the address space operations write_begin and write_end. It
- * uses the write file operation which should be present on all writeable
- * filesystems.
- *
- * Using fops->write is slower than using aops->{prepare,commit}_write in the
- * transforming case because we need to double buffer the data as we cannot do
- * the transformations in place as we do not have direct access to the
- * destination pages of the backing file.
+ * This is the slow, transforming version that needs to double buffer the
+ * data as it cannot do the transformations in place without having direct
+ * access to the destination pages of the backing file.
*/
static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
loff_t pos, struct page *page)
@@ -350,17 +276,16 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
struct page *page = NULL;
int i, ret = 0;
- do_lo_send = do_lo_send_aops;
- if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
+ if (lo->transfer != transfer_none) {
+ page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
+ if (unlikely(!page))
+ goto fail;
+ kmap(page);
+ do_lo_send = do_lo_send_write;
+ } else {
do_lo_send = do_lo_send_direct_write;
- if (lo->transfer != transfer_none) {
- page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
- if (unlikely(!page))
- goto fail;
- kmap(page);
- do_lo_send = do_lo_send_write;
- }
}
+
bio_for_each_segment(bvec, bio, i) {
ret = do_lo_send(lo, bvec, pos, page);
if (ret < 0)
@@ -484,6 +409,29 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
}
}
+ /*
+ * We use punch hole to reclaim the free space used by the
+ * image a.k.a. discard. However we do support discard if
+ * encryption is enabled, because it may give an attacker
+ * useful information.
+ */
+ if (bio->bi_rw & REQ_DISCARD) {
+ struct file *file = lo->lo_backing_file;
+ int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+
+ if ((!file->f_op->fallocate) ||
+ lo->lo_encrypt_key_size) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+ ret = file->f_op->fallocate(file, mode, pos,
+ bio->bi_size);
+ if (unlikely(ret && ret != -EINVAL &&
+ ret != -EOPNOTSUPP))
+ ret = -EIO;
+ goto out;
+ }
+
ret = lo_send(lo, bio, pos);
if ((bio->bi_rw & REQ_FUA) && !ret) {
@@ -514,7 +462,7 @@ static struct bio *loop_get_bio(struct loop_device *lo)
return bio_list_pop(&lo->lo_bio_list);
}
-static int loop_make_request(struct request_queue *q, struct bio *old_bio)
+static void loop_make_request(struct request_queue *q, struct bio *old_bio)
{
struct loop_device *lo = q->queuedata;
int rw = bio_rw(old_bio);
@@ -532,12 +480,11 @@ static int loop_make_request(struct request_queue *q, struct bio *old_bio)
loop_add_bio(lo, old_bio);
wake_up(&lo->lo_event);
spin_unlock_irq(&lo->lo_lock);
- return 0;
+ return;
out:
spin_unlock_irq(&lo->lo_lock);
bio_io_error(old_bio);
- return 0;
}
struct switch_request {
@@ -700,7 +647,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
goto out_putf;
fput(old_file);
- if (max_part > 0)
+ if (lo->lo_flags & LO_FLAGS_PARTSCAN)
ioctl_by_bdev(bdev, BLKRRPART, 0);
return 0;
@@ -777,16 +724,25 @@ static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
return sprintf(buf, "%s\n", autoclear ? "1" : "0");
}
+static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
+{
+ int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
+
+ return sprintf(buf, "%s\n", partscan ? "1" : "0");
+}
+
LOOP_ATTR_RO(backing_file);
LOOP_ATTR_RO(offset);
LOOP_ATTR_RO(sizelimit);
LOOP_ATTR_RO(autoclear);
+LOOP_ATTR_RO(partscan);
static struct attribute *loop_attrs[] = {
&loop_attr_backing_file.attr,
&loop_attr_offset.attr,
&loop_attr_sizelimit.attr,
&loop_attr_autoclear.attr,
+ &loop_attr_partscan.attr,
NULL,
};
@@ -807,6 +763,35 @@ static void loop_sysfs_exit(struct loop_device *lo)
&loop_attribute_group);
}
+static void loop_config_discard(struct loop_device *lo)
+{
+ struct file *file = lo->lo_backing_file;
+ struct inode *inode = file->f_mapping->host;
+ struct request_queue *q = lo->lo_queue;
+
+ /*
+ * We use punch hole to reclaim the free space used by the
+ * image a.k.a. discard. However we do support discard if
+ * encryption is enabled, because it may give an attacker
+ * useful information.
+ */
+ if ((!file->f_op->fallocate) ||
+ lo->lo_encrypt_key_size) {
+ q->limits.discard_granularity = 0;
+ q->limits.discard_alignment = 0;
+ q->limits.max_discard_sectors = 0;
+ q->limits.discard_zeroes_data = 0;
+ queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+ return;
+ }
+
+ q->limits.discard_granularity = inode->i_sb->s_blocksize;
+ q->limits.discard_alignment = inode->i_sb->s_blocksize;
+ q->limits.max_discard_sectors = UINT_MAX >> 9;
+ q->limits.discard_zeroes_data = 1;
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+}
+
static int loop_set_fd(struct loop_device *lo, fmode_t mode,
struct block_device *bdev, unsigned int arg)
{
@@ -849,35 +834,23 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
mapping = file->f_mapping;
inode = mapping->host;
- if (!(file->f_mode & FMODE_WRITE))
- lo_flags |= LO_FLAGS_READ_ONLY;
-
error = -EINVAL;
- if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
- const struct address_space_operations *aops = mapping->a_ops;
-
- if (aops->write_begin)
- lo_flags |= LO_FLAGS_USE_AOPS;
- if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
- lo_flags |= LO_FLAGS_READ_ONLY;
+ if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
+ goto out_putf;
- lo_blocksize = S_ISBLK(inode->i_mode) ?
- inode->i_bdev->bd_block_size : PAGE_SIZE;
+ if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
+ !file->f_op->write)
+ lo_flags |= LO_FLAGS_READ_ONLY;
- error = 0;
- } else {
- goto out_putf;
- }
+ lo_blocksize = S_ISBLK(inode->i_mode) ?
+ inode->i_bdev->bd_block_size : PAGE_SIZE;
+ error = -EFBIG;
size = get_loop_size(lo, file);
-
- if ((loff_t)(sector_t)size != size) {
- error = -EFBIG;
+ if ((loff_t)(sector_t)size != size)
goto out_putf;
- }
- if (!(mode & FMODE_WRITE))
- lo_flags |= LO_FLAGS_READ_ONLY;
+ error = 0;
set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
@@ -919,7 +892,9 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
}
lo->lo_state = Lo_bound;
wake_up_process(lo->lo_thread);
- if (max_part > 0)
+ if (part_shift)
+ lo->lo_flags |= LO_FLAGS_PARTSCAN;
+ if (lo->lo_flags & LO_FLAGS_PARTSCAN)
ioctl_by_bdev(bdev, BLKRRPART, 0);
return 0;
@@ -980,10 +955,11 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
return err;
}
-static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
+static int loop_clr_fd(struct loop_device *lo)
{
struct file *filp = lo->lo_backing_file;
gfp_t gfp = lo->old_gfp_mask;
+ struct block_device *bdev = lo->lo_device;
if (lo->lo_state != Lo_bound)
return -ENXIO;
@@ -1012,7 +988,6 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
lo->lo_offset = 0;
lo->lo_sizelimit = 0;
lo->lo_encrypt_key_size = 0;
- lo->lo_flags = 0;
lo->lo_thread = NULL;
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
@@ -1030,8 +1005,11 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
lo->lo_state = Lo_unbound;
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
- if (max_part > 0 && bdev)
+ if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
ioctl_by_bdev(bdev, BLKRRPART, 0);
+ lo->lo_flags = 0;
+ if (!part_shift)
+ lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
mutex_unlock(&lo->lo_ctl_mutex);
/*
* Need not hold lo_ctl_mutex to fput backing file.
@@ -1085,6 +1063,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (figure_loop_size(lo))
return -EFBIG;
}
+ loop_config_discard(lo);
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
@@ -1100,6 +1079,13 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
(info->lo_flags & LO_FLAGS_AUTOCLEAR))
lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
+ if ((info->lo_flags & LO_FLAGS_PARTSCAN) &&
+ !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
+ lo->lo_flags |= LO_FLAGS_PARTSCAN;
+ lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
+ ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
+ }
+
lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
lo->lo_init[0] = info->lo_init[0];
lo->lo_init[1] = info->lo_init[1];
@@ -1293,7 +1279,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
break;
case LOOP_CLR_FD:
/* loop_clr_fd would have unlocked lo_ctl_mutex on success */
- err = loop_clr_fd(lo, bdev);
+ err = loop_clr_fd(lo);
if (!err)
goto out_unlocked;
break;
@@ -1513,7 +1499,7 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
* In autoclear mode, stop the loop thread
* and remove configuration after last close.
*/
- err = loop_clr_fd(lo, NULL);
+ err = loop_clr_fd(lo);
if (!err)
goto out_unlocked;
} else {
@@ -1635,6 +1621,27 @@ static int loop_add(struct loop_device **l, int i)
if (!disk)
goto out_free_queue;
+ /*
+ * Disable partition scanning by default. The in-kernel partition
+ * scanning can be requested individually per-device during its
+ * setup. Userspace can always add and remove partitions from all
+ * devices. The needed partition minors are allocated from the
+ * extended minor space, the main loop device numbers will continue
+ * to match the loop minors, regardless of the number of partitions
+ * used.
+ *
+ * If max_part is given, partition scanning is globally enabled for
+ * all loop devices. The minors for the main loop devices will be
+ * multiples of max_part.
+ *
+ * Note: Global-for-all-devices, set-only-at-init, read-only module
+ * parameteters like 'max_loop' and 'max_part' make things needlessly
+ * complicated, are too static, inflexible and may surprise
+ * userspace tools. Parameters like this in general should be avoided.
+ */
+ if (!part_shift)
+ disk->flags |= GENHD_FL_NO_PART_SCAN;
+ disk->flags |= GENHD_FL_EXT_DEVT;
mutex_init(&lo->lo_ctl_mutex);
lo->lo_number = i;
lo->lo_thread = NULL;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index f533f3375e24..c3f0ee16594d 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -127,8 +127,7 @@ static void sock_shutdown(struct nbd_device *lo, int lock)
if (lock)
mutex_lock(&lo->tx_lock);
if (lo->sock) {
- printk(KERN_WARNING "%s: shutting down socket\n",
- lo->disk->disk_name);
+ dev_warn(disk_to_dev(lo->disk), "shutting down socket\n");
kernel_sock_shutdown(lo->sock, SHUT_RDWR);
lo->sock = NULL;
}
@@ -158,8 +157,9 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
sigset_t blocked, oldset;
if (unlikely(!sock)) {
- printk(KERN_ERR "%s: Attempted %s on closed socket in sock_xmit\n",
- lo->disk->disk_name, (send ? "send" : "recv"));
+ dev_err(disk_to_dev(lo->disk),
+ "Attempted %s on closed socket in sock_xmit\n",
+ (send ? "send" : "recv"));
return -EINVAL;
}
@@ -250,8 +250,8 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
result = sock_xmit(lo, 1, &request, sizeof(request),
(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
if (result <= 0) {
- printk(KERN_ERR "%s: Send control failed (result %d)\n",
- lo->disk->disk_name, result);
+ dev_err(disk_to_dev(lo->disk),
+ "Send control failed (result %d)\n", result);
goto error_out;
}
@@ -270,8 +270,9 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
lo->disk->disk_name, req, bvec->bv_len);
result = sock_send_bvec(lo, bvec, flags);
if (result <= 0) {
- printk(KERN_ERR "%s: Send data failed (result %d)\n",
- lo->disk->disk_name, result);
+ dev_err(disk_to_dev(lo->disk),
+ "Send data failed (result %d)\n",
+ result);
goto error_out;
}
}
@@ -328,14 +329,13 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
reply.magic = 0;
result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL);
if (result <= 0) {
- printk(KERN_ERR "%s: Receive control failed (result %d)\n",
- lo->disk->disk_name, result);
+ dev_err(disk_to_dev(lo->disk),
+ "Receive control failed (result %d)\n", result);
goto harderror;
}
if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
- printk(KERN_ERR "%s: Wrong magic (0x%lx)\n",
- lo->disk->disk_name,
+ dev_err(disk_to_dev(lo->disk), "Wrong magic (0x%lx)\n",
(unsigned long)ntohl(reply.magic));
result = -EPROTO;
goto harderror;
@@ -347,15 +347,15 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
if (result != -ENOENT)
goto harderror;
- printk(KERN_ERR "%s: Unexpected reply (%p)\n",
- lo->disk->disk_name, reply.handle);
+ dev_err(disk_to_dev(lo->disk), "Unexpected reply (%p)\n",
+ reply.handle);
result = -EBADR;
goto harderror;
}
if (ntohl(reply.error)) {
- printk(KERN_ERR "%s: Other side returned error (%d)\n",
- lo->disk->disk_name, ntohl(reply.error));
+ dev_err(disk_to_dev(lo->disk), "Other side returned error (%d)\n",
+ ntohl(reply.error));
req->errors++;
return req;
}
@@ -369,8 +369,8 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
rq_for_each_segment(bvec, req, iter) {
result = sock_recv_bvec(lo, bvec);
if (result <= 0) {
- printk(KERN_ERR "%s: Receive data failed (result %d)\n",
- lo->disk->disk_name, result);
+ dev_err(disk_to_dev(lo->disk), "Receive data failed (result %d)\n",
+ result);
req->errors++;
return req;
}
@@ -405,10 +405,10 @@ static int nbd_do_it(struct nbd_device *lo)
BUG_ON(lo->magic != LO_MAGIC);
- lo->pid = current->pid;
- ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
+ lo->pid = task_pid_nr(current);
+ ret = device_create_file(disk_to_dev(lo->disk), &pid_attr);
if (ret) {
- printk(KERN_ERR "nbd: sysfs_create_file failed!");
+ dev_err(disk_to_dev(lo->disk), "device_create_file failed!\n");
lo->pid = 0;
return ret;
}
@@ -416,7 +416,7 @@ static int nbd_do_it(struct nbd_device *lo)
while ((req = nbd_read_stat(lo)) != NULL)
nbd_end_request(req);
- sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
+ device_remove_file(disk_to_dev(lo->disk), &pid_attr);
lo->pid = 0;
return 0;
}
@@ -457,8 +457,8 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
if (rq_data_dir(req) == WRITE) {
nbd_cmd(req) = NBD_CMD_WRITE;
if (lo->flags & NBD_READ_ONLY) {
- printk(KERN_ERR "%s: Write on read-only\n",
- lo->disk->disk_name);
+ dev_err(disk_to_dev(lo->disk),
+ "Write on read-only\n");
goto error_out;
}
}
@@ -468,16 +468,15 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
mutex_lock(&lo->tx_lock);
if (unlikely(!lo->sock)) {
mutex_unlock(&lo->tx_lock);
- printk(KERN_ERR "%s: Attempted send on closed socket\n",
- lo->disk->disk_name);
+ dev_err(disk_to_dev(lo->disk),
+ "Attempted send on closed socket\n");
goto error_out;
}
lo->active_req = req;
if (nbd_send_req(lo, req) != 0) {
- printk(KERN_ERR "%s: Request send failed\n",
- lo->disk->disk_name);
+ dev_err(disk_to_dev(lo->disk), "Request send failed\n");
req->errors++;
nbd_end_request(req);
} else {
@@ -549,8 +548,8 @@ static void do_nbd_request(struct request_queue *q)
BUG_ON(lo->magic != LO_MAGIC);
if (unlikely(!lo->sock)) {
- printk(KERN_ERR "%s: Attempted send on closed socket\n",
- lo->disk->disk_name);
+ dev_err(disk_to_dev(lo->disk),
+ "Attempted send on closed socket\n");
req->errors++;
nbd_end_request(req);
spin_lock_irq(q->queue_lock);
@@ -576,7 +575,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
case NBD_DISCONNECT: {
struct request sreq;
- printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
+ dev_info(disk_to_dev(lo->disk), "NBD_DISCONNECT\n");
blk_rq_init(NULL, &sreq);
sreq.cmd_type = REQ_TYPE_SPECIAL;
@@ -674,7 +673,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
file = lo->file;
lo->file = NULL;
nbd_clear_que(lo);
- printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name);
+ dev_warn(disk_to_dev(lo->disk), "queue cleared\n");
if (file)
fput(file);
lo->bytesize = 0;
@@ -694,8 +693,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
return 0;
case NBD_PRINT_DEBUG:
- printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n",
- bdev->bd_disk->disk_name,
+ dev_info(disk_to_dev(lo->disk),
+ "next = %p, prev = %p, head = %p\n",
lo->queue_head.next, lo->queue_head.prev,
&lo->queue_head);
return 0;
@@ -745,7 +744,7 @@ static int __init nbd_init(void)
BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
if (max_part < 0) {
- printk(KERN_CRIT "nbd: max_part must be >= 0\n");
+ printk(KERN_ERR "nbd: max_part must be >= 0\n");
return -EINVAL;
}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index e133f094ab08..a63b0a2b7805 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2444,7 +2444,7 @@ static void pkt_end_io_read_cloned(struct bio *bio, int err)
pkt_bio_finished(pd);
}
-static int pkt_make_request(struct request_queue *q, struct bio *bio)
+static void pkt_make_request(struct request_queue *q, struct bio *bio)
{
struct pktcdvd_device *pd;
char b[BDEVNAME_SIZE];
@@ -2473,7 +2473,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
cloned_bio->bi_end_io = pkt_end_io_read_cloned;
pd->stats.secs_r += bio->bi_size >> 9;
pkt_queue_bio(pd, cloned_bio);
- return 0;
+ return;
}
if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
@@ -2509,7 +2509,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
pkt_make_request(q, &bp->bio1);
pkt_make_request(q, &bp->bio2);
bio_pair_release(bp);
- return 0;
+ return;
}
}
@@ -2533,7 +2533,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
}
spin_unlock(&pkt->lock);
spin_unlock(&pd->cdrw.active_list_lock);
- return 0;
+ return;
} else {
blocked_bio = 1;
}
@@ -2584,10 +2584,9 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
*/
wake_up(&pd->wqueue);
}
- return 0;
+ return;
end_io:
bio_io_error(bio);
- return 0;
}
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index cbd735b931ea..f58cdcfb305f 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -597,7 +597,7 @@ out:
return next;
}
-static int ps3vram_make_request(struct request_queue *q, struct bio *bio)
+static void ps3vram_make_request(struct request_queue *q, struct bio *bio)
{
struct ps3_system_bus_device *dev = q->queuedata;
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -611,13 +611,11 @@ static int ps3vram_make_request(struct request_queue *q, struct bio *bio)
spin_unlock_irq(&priv->lock);
if (busy)
- return 0;
+ return;
do {
bio = ps3vram_do_bio(dev, bio);
} while (bio);
-
- return 0;
}
static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev)
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 031ca720d926..aa2712060bfb 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -513,7 +513,7 @@ static void process_page(unsigned long data)
}
}
-static int mm_make_request(struct request_queue *q, struct bio *bio)
+static void mm_make_request(struct request_queue *q, struct bio *bio)
{
struct cardinfo *card = q->queuedata;
pr_debug("mm_make_request %llu %u\n",
@@ -525,7 +525,7 @@ static int mm_make_request(struct request_queue *q, struct bio *bio)
card->biotail = &bio->bi_next;
spin_unlock_irq(&card->lock);
- return 0;
+ return;
}
static irqreturn_t mm_interrupt(int irq, void *__card)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index aa7094f23017..4d0b70adf5f7 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -9,10 +9,13 @@
#include <linux/scatterlist.h>
#include <linux/string_helpers.h>
#include <scsi/scsi_cmnd.h>
+#include <linux/idr.h>
#define PART_BITS 4
-static int major, index;
+static int major;
+static DEFINE_IDA(vd_index_ida);
+
struct workqueue_struct *virtblk_wq;
struct virtio_blk
@@ -36,6 +39,9 @@ struct virtio_blk
/* What host tells us, plus 2 for header & tailer. */
unsigned int sg_elems;
+ /* Ida index - used to track minor number allocations. */
+ int index;
+
/* Scatterlist: can be too big for stack. */
struct scatterlist sg[/*sg_elems*/];
};
@@ -277,6 +283,11 @@ static int index_to_minor(int index)
return index << PART_BITS;
}
+static int minor_to_index(int minor)
+{
+ return minor >> PART_BITS;
+}
+
static ssize_t virtblk_serial_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -342,14 +353,17 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
struct request_queue *q;
- int err;
+ int err, index;
u64 cap;
u32 v, blk_size, sg_elems, opt_io_size;
u16 min_io_size;
u8 physical_block_exp, alignment_offset;
- if (index_to_minor(index) >= 1 << MINORBITS)
- return -ENOSPC;
+ err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
+ GFP_KERNEL);
+ if (err < 0)
+ goto out;
+ index = err;
/* We need to know how many segments before we allocate. */
err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
@@ -366,7 +380,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
if (!vblk) {
err = -ENOMEM;
- goto out;
+ goto out_free_index;
}
INIT_LIST_HEAD(&vblk->reqs);
@@ -422,7 +436,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
vblk->disk->private_data = vblk;
vblk->disk->fops = &virtblk_fops;
vblk->disk->driverfs_dev = &vdev->dev;
- index++;
+ vblk->index = index;
/* configure queue flush support */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
@@ -517,6 +531,8 @@ out_free_vq:
vdev->config->del_vqs(vdev);
out_free_vblk:
kfree(vblk);
+out_free_index:
+ ida_simple_remove(&vd_index_ida, index);
out:
return err;
}
@@ -524,6 +540,7 @@ out:
static void __devexit virtblk_remove(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
+ int index = vblk->index;
flush_work(&vblk->config_work);
@@ -539,6 +556,7 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
mempool_destroy(vblk->pool);
vdev->config->del_vqs(vdev);
kfree(vblk);
+ ida_simple_remove(&vd_index_ida, index);
}
static const struct virtio_device_id id_table[] = {
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 1540792b1e54..15ec4db194d1 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -39,6 +39,9 @@
#include <linux/list.h>
#include <linux/delay.h>
#include <linux/freezer.h>
+#include <linux/loop.h>
+#include <linux/falloc.h>
+#include <linux/fs.h>
#include <xen/events.h>
#include <xen/page.h>
@@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
static void print_stats(struct xen_blkif *blkif)
{
- pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n",
+ pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d"
+ " | ds %4d\n",
current->comm, blkif->st_oo_req,
- blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
+ blkif->st_rd_req, blkif->st_wr_req,
+ blkif->st_f_req, blkif->st_ds_req);
blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
blkif->st_rd_req = 0;
blkif->st_wr_req = 0;
blkif->st_oo_req = 0;
+ blkif->st_ds_req = 0;
}
int xen_blkif_schedule(void *arg)
@@ -410,6 +416,59 @@ static int xen_blkbk_map(struct blkif_request *req,
return ret;
}
+static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
+{
+ int err = 0;
+ int status = BLKIF_RSP_OKAY;
+ struct block_device *bdev = blkif->vbd.bdev;
+
+ if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
+ /* just forward the discard request */
+ err = blkdev_issue_discard(bdev,
+ req->u.discard.sector_number,
+ req->u.discard.nr_sectors,
+ GFP_KERNEL, 0);
+ else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
+ /* punch a hole in the backing file */
+ struct loop_device *lo = bdev->bd_disk->private_data;
+ struct file *file = lo->lo_backing_file;
+
+ if (file->f_op->fallocate)
+ err = file->f_op->fallocate(file,
+ FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+ req->u.discard.sector_number << 9,
+ req->u.discard.nr_sectors << 9);
+ else
+ err = -EOPNOTSUPP;
+ } else
+ err = -EOPNOTSUPP;
+
+ if (err == -EOPNOTSUPP) {
+ pr_debug(DRV_PFX "discard op failed, not supported\n");
+ status = BLKIF_RSP_EOPNOTSUPP;
+ } else if (err)
+ status = BLKIF_RSP_ERROR;
+
+ make_response(blkif, req->id, req->operation, status);
+}
+
+static void xen_blk_drain_io(struct xen_blkif *blkif)
+{
+ atomic_set(&blkif->drain, 1);
+ do {
+ /* The initial value is one, and one refcnt taken at the
+ * start of the xen_blkif_schedule thread. */
+ if (atomic_read(&blkif->refcnt) <= 2)
+ break;
+ wait_for_completion_interruptible_timeout(
+ &blkif->drain_complete, HZ);
+
+ if (!atomic_read(&blkif->drain))
+ break;
+ } while (!kthread_should_stop());
+ atomic_set(&blkif->drain, 0);
+}
+
/*
* Completion callback on the bio's. Called as bh->b_end_io()
*/
@@ -422,6 +481,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
+ } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
+ (error == -EOPNOTSUPP)) {
+ pr_debug(DRV_PFX "write barrier op failed, not supported\n");
+ xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
+ pending_req->status = BLKIF_RSP_EOPNOTSUPP;
} else if (error) {
pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
" error=%d\n", error);
@@ -438,6 +502,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
make_response(pending_req->blkif, pending_req->id,
pending_req->operation, pending_req->status);
xen_blkif_put(pending_req->blkif);
+ if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
+ if (atomic_read(&pending_req->blkif->drain))
+ complete(&pending_req->blkif->drain_complete);
+ }
free_req(pending_req);
}
}
@@ -532,7 +600,6 @@ do_block_io_op(struct xen_blkif *blkif)
return more_to_do;
}
-
/*
* Transmutation of the 'struct blkif_request' to a proper 'struct bio'
* and call the 'submit_bio' to pass it to the underlying storage.
@@ -549,6 +616,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
int i, nbio = 0;
int operation;
struct blk_plug plug;
+ bool drain = false;
switch (req->operation) {
case BLKIF_OP_READ:
@@ -559,11 +627,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
blkif->st_wr_req++;
operation = WRITE_ODIRECT;
break;
+ case BLKIF_OP_WRITE_BARRIER:
+ drain = true;
case BLKIF_OP_FLUSH_DISKCACHE:
blkif->st_f_req++;
operation = WRITE_FLUSH;
break;
- case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_DISCARD:
+ blkif->st_ds_req++;
+ operation = REQ_DISCARD;
+ break;
default:
operation = 0; /* make gcc happy */
goto fail_response;
@@ -572,7 +645,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
/* Check that the number of segments is sane. */
nseg = req->nr_segments;
- if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
+ if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
+ operation != REQ_DISCARD) ||
unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
nseg);
@@ -621,16 +695,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
}
}
+ /* Wait on all outstanding I/O's and once that has been completed
+ * issue the WRITE_FLUSH.
+ */
+ if (drain)
+ xen_blk_drain_io(pending_req->blkif);
+
/*
* If we have failed at this point, we need to undo the M2P override,
* set gnttab_set_unmap_op on all of the grant references and perform
* the hypercall to unmap the grants - that is all done in
* xen_blkbk_unmap.
*/
- if (xen_blkbk_map(req, pending_req, seg))
+ if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg))
goto fail_flush;
- /* This corresponding xen_blkif_put is done in __end_block_io_op */
+ /*
+ * This corresponding xen_blkif_put is done in __end_block_io_op, or
+ * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
+ */
xen_blkif_get(blkif);
for (i = 0; i < nseg; i++) {
@@ -654,18 +737,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
preq.sector_number += seg[i].nsec;
}
- /* This will be hit if the operation was a flush. */
+ /* This will be hit if the operation was a flush or discard. */
if (!bio) {
- BUG_ON(operation != WRITE_FLUSH);
+ BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);
- bio = bio_alloc(GFP_KERNEL, 0);
- if (unlikely(bio == NULL))
- goto fail_put_bio;
+ if (operation == WRITE_FLUSH) {
+ bio = bio_alloc(GFP_KERNEL, 0);
+ if (unlikely(bio == NULL))
+ goto fail_put_bio;
- biolist[nbio++] = bio;
- bio->bi_bdev = preq.bdev;
- bio->bi_private = pending_req;
- bio->bi_end_io = end_block_io_op;
+ biolist[nbio++] = bio;
+ bio->bi_bdev = preq.bdev;
+ bio->bi_private = pending_req;
+ bio->bi_end_io = end_block_io_op;
+ } else if (operation == REQ_DISCARD) {
+ xen_blk_discard(blkif, req);
+ xen_blkif_put(blkif);
+ free_req(pending_req);
+ return 0;
+ }
}
/*
@@ -685,7 +775,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
if (operation == READ)
blkif->st_rd_sect += preq.nr_sects;
- else if (operation == WRITE || operation == WRITE_FLUSH)
+ else if (operation & WRITE)
blkif->st_wr_sect += preq.nr_sects;
return 0;
@@ -765,9 +855,9 @@ static int __init xen_blkif_init(void)
mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
- blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) *
+ blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) *
xen_blkif_reqs, GFP_KERNEL);
- blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) *
+ blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
mmap_pages, GFP_KERNEL);
blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) *
mmap_pages, GFP_KERNEL);
@@ -790,8 +880,6 @@ static int __init xen_blkif_init(void)
if (rc)
goto failed_init;
- memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
-
INIT_LIST_HEAD(&blkbk->pending_free);
spin_lock_init(&blkbk->pending_free_lock);
init_waitqueue_head(&blkbk->pending_free_wq);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index c4bd34063ecc..dfb1b3a43a5d 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -62,13 +62,26 @@ struct blkif_common_response {
/* i386 protocol version */
#pragma pack(push, 4)
+
+struct blkif_x86_32_request_rw {
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+struct blkif_x86_32_request_discard {
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ uint64_t nr_sectors;
+};
+
struct blkif_x86_32_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
- blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
- struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ union {
+ struct blkif_x86_32_request_rw rw;
+ struct blkif_x86_32_request_discard discard;
+ } u;
};
struct blkif_x86_32_response {
uint64_t id; /* copied from request */
@@ -78,13 +91,26 @@ struct blkif_x86_32_response {
#pragma pack(pop)
/* x86_64 protocol version */
+
+struct blkif_x86_64_request_rw {
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+struct blkif_x86_64_request_discard {
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ uint64_t nr_sectors;
+};
+
struct blkif_x86_64_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t __attribute__((__aligned__(8))) id;
- blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
- struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ union {
+ struct blkif_x86_64_request_rw rw;
+ struct blkif_x86_64_request_discard discard;
+ } u;
};
struct blkif_x86_64_response {
uint64_t __attribute__((__aligned__(8))) id;
@@ -112,6 +138,11 @@ enum blkif_protocol {
BLKIF_PROTOCOL_X86_64 = 3,
};
+enum blkif_backend_type {
+ BLKIF_BACKEND_PHY = 1,
+ BLKIF_BACKEND_FILE = 2,
+};
+
struct xen_vbd {
/* What the domain refers to this vbd as. */
blkif_vdev_t handle;
@@ -137,8 +168,9 @@ struct xen_blkif {
unsigned int irq;
/* Comms information. */
enum blkif_protocol blk_protocol;
+ enum blkif_backend_type blk_backend_type;
union blkif_back_rings blk_rings;
- struct vm_struct *blk_ring_area;
+ void *blk_ring;
/* The VBD attached to this interface. */
struct xen_vbd vbd;
/* Back pointer to the backend_info. */
@@ -148,6 +180,9 @@ struct xen_blkif {
atomic_t refcnt;
wait_queue_head_t wq;
+ /* for barrier (drain) requests */
+ struct completion drain_complete;
+ atomic_t drain;
/* One thread per one blkif. */
struct task_struct *xenblkd;
unsigned int waiting_reqs;
@@ -158,13 +193,11 @@ struct xen_blkif {
int st_wr_req;
int st_oo_req;
int st_f_req;
+ int st_ds_req;
int st_rd_sect;
int st_wr_sect;
wait_queue_head_t waiting_to_free;
-
- grant_handle_t shmem_handle;
- grant_ref_t shmem_ref;
};
@@ -181,7 +214,7 @@ struct xen_blkif {
struct phys_req {
unsigned short dev;
- unsigned short nr_sects;
+ blkif_sector_t nr_sects;
struct block_device *bdev;
blkif_sector_t sector_number;
};
@@ -195,6 +228,8 @@ int xen_blkif_schedule(void *arg);
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
struct backend_info *be, int state);
+int xen_blkbk_barrier(struct xenbus_transaction xbt,
+ struct backend_info *be, int state);
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
static inline void blkif_get_x86_32_req(struct blkif_request *dst,
@@ -205,12 +240,25 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
dst->nr_segments = src->nr_segments;
dst->handle = src->handle;
dst->id = src->id;
- dst->u.rw.sector_number = src->sector_number;
- barrier();
- if (n > dst->nr_segments)
- n = dst->nr_segments;
- for (i = 0; i < n; i++)
- dst->u.rw.seg[i] = src->seg[i];
+ switch (src->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ dst->u.rw.sector_number = src->u.rw.sector_number;
+ barrier();
+ if (n > dst->nr_segments)
+ n = dst->nr_segments;
+ for (i = 0; i < n; i++)
+ dst->u.rw.seg[i] = src->u.rw.seg[i];
+ break;
+ case BLKIF_OP_DISCARD:
+ dst->u.discard.sector_number = src->u.discard.sector_number;
+ dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+ break;
+ default:
+ break;
+ }
}
static inline void blkif_get_x86_64_req(struct blkif_request *dst,
@@ -221,12 +269,25 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
dst->nr_segments = src->nr_segments;
dst->handle = src->handle;
dst->id = src->id;
- dst->u.rw.sector_number = src->sector_number;
- barrier();
- if (n > dst->nr_segments)
- n = dst->nr_segments;
- for (i = 0; i < n; i++)
- dst->u.rw.seg[i] = src->seg[i];
+ switch (src->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ dst->u.rw.sector_number = src->u.rw.sector_number;
+ barrier();
+ if (n > dst->nr_segments)
+ n = dst->nr_segments;
+ for (i = 0; i < n; i++)
+ dst->u.rw.seg[i] = src->u.rw.seg[i];
+ break;
+ case BLKIF_OP_DISCARD:
+ dst->u.discard.sector_number = src->u.discard.sector_number;
+ dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+ break;
+ default:
+ break;
+ }
}
#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 5fd2010f7d2b..f759ad4584c3 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -114,44 +114,14 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
spin_lock_init(&blkif->blk_ring_lock);
atomic_set(&blkif->refcnt, 1);
init_waitqueue_head(&blkif->wq);
+ init_completion(&blkif->drain_complete);
+ atomic_set(&blkif->drain, 0);
blkif->st_print = jiffies;
init_waitqueue_head(&blkif->waiting_to_free);
return blkif;
}
-static int map_frontend_page(struct xen_blkif *blkif, unsigned long shared_page)
-{
- struct gnttab_map_grant_ref op;
-
- gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
- GNTMAP_host_map, shared_page, blkif->domid);
-
- if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
- BUG();
-
- if (op.status) {
- DPRINTK("Grant table operation failure !\n");
- return op.status;
- }
-
- blkif->shmem_ref = shared_page;
- blkif->shmem_handle = op.handle;
-
- return 0;
-}
-
-static void unmap_frontend_page(struct xen_blkif *blkif)
-{
- struct gnttab_unmap_grant_ref op;
-
- gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
- GNTMAP_host_map, blkif->shmem_handle);
-
- if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
- BUG();
-}
-
static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
unsigned int evtchn)
{
@@ -161,35 +131,29 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
if (blkif->irq)
return 0;
- blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE);
- if (!blkif->blk_ring_area)
- return -ENOMEM;
-
- err = map_frontend_page(blkif, shared_page);
- if (err) {
- free_vm_area(blkif->blk_ring_area);
+ err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring);
+ if (err < 0)
return err;
- }
switch (blkif->blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
{
struct blkif_sring *sring;
- sring = (struct blkif_sring *)blkif->blk_ring_area->addr;
+ sring = (struct blkif_sring *)blkif->blk_ring;
BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
break;
}
case BLKIF_PROTOCOL_X86_32:
{
struct blkif_x86_32_sring *sring_x86_32;
- sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr;
+ sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
break;
}
case BLKIF_PROTOCOL_X86_64:
{
struct blkif_x86_64_sring *sring_x86_64;
- sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr;
+ sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
break;
}
@@ -201,8 +165,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
xen_blkif_be_int, 0,
"blkif-backend", blkif);
if (err < 0) {
- unmap_frontend_page(blkif);
- free_vm_area(blkif->blk_ring_area);
+ xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
blkif->blk_rings.common.sring = NULL;
return err;
}
@@ -228,8 +191,7 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
}
if (blkif->blk_rings.common.sring) {
- unmap_frontend_page(blkif);
- free_vm_area(blkif->blk_ring_area);
+ xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
blkif->blk_rings.common.sring = NULL;
}
}
@@ -272,6 +234,7 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req);
+VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req);
VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
@@ -280,6 +243,7 @@ static struct attribute *xen_vbdstat_attrs[] = {
&dev_attr_rd_req.attr,
&dev_attr_wr_req.attr,
&dev_attr_f_req.attr,
+ &dev_attr_ds_req.attr,
&dev_attr_rd_sect.attr,
&dev_attr_wr_sect.attr,
NULL
@@ -419,6 +383,73 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
return err;
}
+int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
+{
+ struct xenbus_device *dev = be->dev;
+ struct xen_blkif *blkif = be->blkif;
+ char *type;
+ int err;
+ int state = 0;
+
+ type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
+ if (!IS_ERR(type)) {
+ if (strncmp(type, "file", 4) == 0) {
+ state = 1;
+ blkif->blk_backend_type = BLKIF_BACKEND_FILE;
+ }
+ if (strncmp(type, "phy", 3) == 0) {
+ struct block_device *bdev = be->blkif->vbd.bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ if (blk_queue_discard(q)) {
+ err = xenbus_printf(xbt, dev->nodename,
+ "discard-granularity", "%u",
+ q->limits.discard_granularity);
+ if (err) {
+ xenbus_dev_fatal(dev, err,
+ "writing discard-granularity");
+ goto kfree;
+ }
+ err = xenbus_printf(xbt, dev->nodename,
+ "discard-alignment", "%u",
+ q->limits.discard_alignment);
+ if (err) {
+ xenbus_dev_fatal(dev, err,
+ "writing discard-alignment");
+ goto kfree;
+ }
+ state = 1;
+ blkif->blk_backend_type = BLKIF_BACKEND_PHY;
+ }
+ }
+ } else {
+ err = PTR_ERR(type);
+ xenbus_dev_fatal(dev, err, "reading type");
+ goto out;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-discard",
+ "%d", state);
+ if (err)
+ xenbus_dev_fatal(dev, err, "writing feature-discard");
+kfree:
+ kfree(type);
+out:
+ return err;
+}
+int xen_blkbk_barrier(struct xenbus_transaction xbt,
+ struct backend_info *be, int state)
+{
+ struct xenbus_device *dev = be->dev;
+ int err;
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
+ "%d", state);
+ if (err)
+ xenbus_dev_fatal(dev, err, "writing feature-barrier");
+
+ return err;
+}
+
/*
* Entry point to this code when a new device is created. Allocate the basic
* structures, and watch the store waiting for the hotplug scripts to tell us
@@ -650,6 +681,11 @@ again:
if (err)
goto abort;
+ err = xen_blkbk_discard(xbt, be);
+
+ /* If we can't advertise it is OK. */
+ err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
+
err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
(unsigned long long)vbd_sz(&be->blkif->vbd));
if (err) {
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 9ea8c2576c70..7b2ec5908413 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -98,6 +98,9 @@ struct blkfront_info
unsigned long shadow_free;
unsigned int feature_flush;
unsigned int flush_op;
+ unsigned int feature_discard;
+ unsigned int discard_granularity;
+ unsigned int discard_alignment;
int is_ready;
};
@@ -302,29 +305,36 @@ static int blkif_queue_request(struct request *req)
ring_req->operation = info->flush_op;
}
- ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
- BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ if (unlikely(req->cmd_flags & REQ_DISCARD)) {
+ /* id, sector_number and handle are set above. */
+ ring_req->operation = BLKIF_OP_DISCARD;
+ ring_req->nr_segments = 0;
+ ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
+ } else {
+ ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
+ BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
- for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
- buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
- fsect = sg->offset >> 9;
- lsect = fsect + (sg->length >> 9) - 1;
- /* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head);
- BUG_ON(ref == -ENOSPC);
+ for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
+ buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
+ fsect = sg->offset >> 9;
+ lsect = fsect + (sg->length >> 9) - 1;
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(&gref_head);
+ BUG_ON(ref == -ENOSPC);
- gnttab_grant_foreign_access_ref(
- ref,
- info->xbdev->otherend_id,
- buffer_mfn,
- rq_data_dir(req) );
-
- info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
- ring_req->u.rw.seg[i] =
- (struct blkif_request_segment) {
- .gref = ref,
- .first_sect = fsect,
- .last_sect = lsect };
+ gnttab_grant_foreign_access_ref(
+ ref,
+ info->xbdev->otherend_id,
+ buffer_mfn,
+ rq_data_dir(req));
+
+ info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
+ ring_req->u.rw.seg[i] =
+ (struct blkif_request_segment) {
+ .gref = ref,
+ .first_sect = fsect,
+ .last_sect = lsect };
+ }
}
info->ring.req_prod_pvt++;
@@ -370,7 +380,9 @@ static void do_blkif_request(struct request_queue *rq)
blk_start_request(req);
- if (req->cmd_type != REQ_TYPE_FS) {
+ if ((req->cmd_type != REQ_TYPE_FS) ||
+ ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
+ !info->flush_op)) {
__blk_end_request_all(req, -EIO);
continue;
}
@@ -399,6 +411,7 @@ wait:
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
{
struct request_queue *rq;
+ struct blkfront_info *info = gd->private_data;
rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
if (rq == NULL)
@@ -406,6 +419,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
+ if (info->feature_discard) {
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
+ blk_queue_max_discard_sectors(rq, get_capacity(gd));
+ rq->limits.discard_granularity = info->discard_granularity;
+ rq->limits.discard_alignment = info->discard_alignment;
+ }
+
/* Hard sector size and max sectors impersonate the equiv. hardware. */
blk_queue_logical_block_size(rq, sector_size);
blk_queue_max_hw_sectors(rq, 512);
@@ -722,6 +742,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
switch (bret->operation) {
+ case BLKIF_OP_DISCARD:
+ if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+ struct request_queue *rq = info->rq;
+ printk(KERN_WARNING "blkfront: %s: discard op failed\n",
+ info->gd->disk_name);
+ error = -EOPNOTSUPP;
+ info->feature_discard = 0;
+ queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
+ }
+ __blk_end_request_all(req, error);
+ break;
case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
@@ -1098,6 +1129,33 @@ blkfront_closing(struct blkfront_info *info)
bdput(bdev);
}
+static void blkfront_setup_discard(struct blkfront_info *info)
+{
+ int err;
+ char *type;
+ unsigned int discard_granularity;
+ unsigned int discard_alignment;
+
+ type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
+ if (IS_ERR(type))
+ return;
+
+ if (strncmp(type, "phy", 3) == 0) {
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ "discard-granularity", "%u", &discard_granularity,
+ "discard-alignment", "%u", &discard_alignment,
+ NULL);
+ if (!err) {
+ info->feature_discard = 1;
+ info->discard_granularity = discard_granularity;
+ info->discard_alignment = discard_alignment;
+ }
+ } else if (strncmp(type, "file", 4) == 0)
+ info->feature_discard = 1;
+
+ kfree(type);
+}
+
/*
* Invoked when the backend is finally 'ready' (and has told produced
* the details about the physical device - #sectors, size, etc).
@@ -1108,7 +1166,7 @@ static void blkfront_connect(struct blkfront_info *info)
unsigned long sector_size;
unsigned int binfo;
int err;
- int barrier, flush;
+ int barrier, flush, discard;
switch (info->connected) {
case BLKIF_STATE_CONNECTED:
@@ -1178,7 +1236,14 @@ static void blkfront_connect(struct blkfront_info *info)
info->feature_flush = REQ_FLUSH;
info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
}
-
+
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ "feature-discard", "%d", &discard,
+ NULL);
+
+ if (!err && discard)
+ blkfront_setup_discard(info);
+
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@ -1385,6 +1450,8 @@ static struct xenbus_driver blkfront = {
static int __init xlblk_init(void)
{
+ int ret;
+
if (!xen_domain())
return -ENODEV;
@@ -1394,7 +1461,13 @@ static int __init xlblk_init(void)
return -ENODEV;
}
- return xenbus_register_frontend(&blkfront);
+ ret = xenbus_register_frontend(&blkfront);
+ if (ret) {
+ unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
+ return ret;
+ }
+
+ return 0;
}
module_init(xlblk_init);