summaryrefslogtreecommitdiff
path: root/drivers/block/xen-blkfront.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/xen-blkfront.c')
-rw-r--r--drivers/block/xen-blkfront.c199
1 files changed, 171 insertions, 28 deletions
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 007db8986e84..96e9b00db081 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -44,6 +44,7 @@
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/bitmap.h>
+#include <linux/llist.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
@@ -64,10 +65,17 @@ enum blkif_state {
BLKIF_STATE_SUSPENDED,
};
+struct grant {
+ grant_ref_t gref;
+ unsigned long pfn;
+ struct llist_node node;
+};
+
struct blk_shadow {
struct blkif_request req;
struct request *request;
unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
static DEFINE_MUTEX(blkfront_mutex);
@@ -97,6 +105,8 @@ struct blkfront_info
struct work_struct work;
struct gnttab_free_callback callback;
struct blk_shadow shadow[BLK_RING_SIZE];
+ struct llist_head persistent_gnts;
+ unsigned int persistent_gnts_c;
unsigned long shadow_free;
unsigned int feature_flush;
unsigned int flush_op;
@@ -104,6 +114,7 @@ struct blkfront_info
unsigned int feature_secdiscard:1;
unsigned int discard_granularity;
unsigned int discard_alignment;
+ unsigned int feature_persistent:1;
int is_ready;
};
@@ -287,21 +298,36 @@ static int blkif_queue_request(struct request *req)
unsigned long id;
unsigned int fsect, lsect;
int i, ref;
+
+ /*
+ * Used to store if we are able to queue the request by just using
+ * existing persistent grants, or if we have to get new grants,
+ * as there are not sufficiently many free.
+ */
+ bool new_persistent_gnts;
grant_ref_t gref_head;
+ struct page *granted_page;
+ struct grant *gnt_list_entry = NULL;
struct scatterlist *sg;
if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
return 1;
- if (gnttab_alloc_grant_references(
- BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
- gnttab_request_free_callback(
- &info->callback,
- blkif_restart_queue_callback,
- info,
- BLKIF_MAX_SEGMENTS_PER_REQUEST);
- return 1;
- }
+ /* Check if we have enought grants to allocate a requests */
+ if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+ new_persistent_gnts = 1;
+ if (gnttab_alloc_grant_references(
+ BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
+ &gref_head) < 0) {
+ gnttab_request_free_callback(
+ &info->callback,
+ blkif_restart_queue_callback,
+ info,
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ return 1;
+ }
+ } else
+ new_persistent_gnts = 0;
/* Fill out a communications ring structure. */
ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
@@ -341,18 +367,73 @@ static int blkif_queue_request(struct request *req)
BLKIF_MAX_SEGMENTS_PER_REQUEST);
for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
- buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
fsect = sg->offset >> 9;
lsect = fsect + (sg->length >> 9) - 1;
- /* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head);
- BUG_ON(ref == -ENOSPC);
- gnttab_grant_foreign_access_ref(
- ref,
+ if (info->persistent_gnts_c) {
+ BUG_ON(llist_empty(&info->persistent_gnts));
+ gnt_list_entry = llist_entry(
+ llist_del_first(&info->persistent_gnts),
+ struct grant, node);
+
+ ref = gnt_list_entry->gref;
+ buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
+ info->persistent_gnts_c--;
+ } else {
+ ref = gnttab_claim_grant_reference(&gref_head);
+ BUG_ON(ref == -ENOSPC);
+
+ gnt_list_entry =
+ kmalloc(sizeof(struct grant),
+ GFP_ATOMIC);
+ if (!gnt_list_entry)
+ return -ENOMEM;
+
+ granted_page = alloc_page(GFP_ATOMIC);
+ if (!granted_page) {
+ kfree(gnt_list_entry);
+ return -ENOMEM;
+ }
+
+ gnt_list_entry->pfn =
+ page_to_pfn(granted_page);
+ gnt_list_entry->gref = ref;
+
+ buffer_mfn = pfn_to_mfn(page_to_pfn(
+ granted_page));
+ gnttab_grant_foreign_access_ref(ref,
info->xbdev->otherend_id,
- buffer_mfn,
- rq_data_dir(req));
+ buffer_mfn, 0);
+ }
+
+ info->shadow[id].grants_used[i] = gnt_list_entry;
+
+ if (rq_data_dir(req)) {
+ char *bvec_data;
+ void *shared_data;
+
+ BUG_ON(sg->offset + sg->length > PAGE_SIZE);
+
+ shared_data = kmap_atomic(
+ pfn_to_page(gnt_list_entry->pfn));
+ bvec_data = kmap_atomic(sg_page(sg));
+
+ /*
+ * this does not wipe data stored outside the
+ * range sg->offset..sg->offset+sg->length.
+ * Therefore, blkback *could* see data from
+ * previous requests. This is OK as long as
+ * persistent grants are shared with just one
+ * domain. It may need refactoring if this
+ * changes
+ */
+ memcpy(shared_data + sg->offset,
+ bvec_data + sg->offset,
+ sg->length);
+
+ kunmap_atomic(bvec_data);
+ kunmap_atomic(shared_data);
+ }
info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
ring_req->u.rw.seg[i] =
@@ -368,7 +449,8 @@ static int blkif_queue_request(struct request *req)
/* Keep a private copy so we can reissue requests when recovering. */
info->shadow[id].req = *ring_req;
- gnttab_free_grant_references(gref_head);
+ if (new_persistent_gnts)
+ gnttab_free_grant_references(gref_head);
return 0;
}
@@ -480,12 +562,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
static void xlvbd_flush(struct blkfront_info *info)
{
blk_queue_flush(info->rq, info->feature_flush);
- printk(KERN_INFO "blkfront: %s: %s: %s\n",
+ printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
info->gd->disk_name,
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
"flush diskcache" : "barrier or flush"),
- info->feature_flush ? "enabled" : "disabled");
+ info->feature_flush ? "enabled" : "disabled",
+ info->feature_persistent ? "using persistent grants" : "");
}
static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -707,6 +790,9 @@ static void blkif_restart_queue(struct work_struct *work)
static void blkif_free(struct blkfront_info *info, int suspend)
{
+ struct llist_node *all_gnts;
+ struct grant *persistent_gnt;
+
/* Prevent new requests being issued until we fix things up. */
spin_lock_irq(&info->io_lock);
info->connected = suspend ?
@@ -714,6 +800,18 @@ static void blkif_free(struct blkfront_info *info, int suspend)
/* No more blkif_request(). */
if (info->rq)
blk_stop_queue(info->rq);
+
+ /* Remove all persistent grants */
+ if (info->persistent_gnts_c) {
+ all_gnts = llist_del_all(&info->persistent_gnts);
+ llist_for_each_entry(persistent_gnt, all_gnts, node) {
+ gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+ __free_page(pfn_to_page(persistent_gnt->pfn));
+ kfree(persistent_gnt);
+ }
+ info->persistent_gnts_c = 0;
+ }
+
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
spin_unlock_irq(&info->io_lock);
@@ -734,13 +832,43 @@ static void blkif_free(struct blkfront_info *info, int suspend)
}
-static void blkif_completion(struct blk_shadow *s)
+static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
+ struct blkif_response *bret)
{
int i;
- /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
- * flag. */
- for (i = 0; i < s->req.u.rw.nr_segments; i++)
- gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
+ struct bio_vec *bvec;
+ struct req_iterator iter;
+ unsigned long flags;
+ char *bvec_data;
+ void *shared_data;
+ unsigned int offset = 0;
+
+ if (bret->operation == BLKIF_OP_READ) {
+ /*
+ * Copy the data received from the backend into the bvec.
+ * Since bv_offset can be different than 0, and bv_len different
+ * than PAGE_SIZE, we have to keep track of the current offset,
+ * to be sure we are copying the data from the right shared page.
+ */
+ rq_for_each_segment(bvec, s->request, iter) {
+ BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
+ i = offset >> PAGE_SHIFT;
+ BUG_ON(i >= s->req.u.rw.nr_segments);
+ shared_data = kmap_atomic(
+ pfn_to_page(s->grants_used[i]->pfn));
+ bvec_data = bvec_kmap_irq(bvec, &flags);
+ memcpy(bvec_data, shared_data + bvec->bv_offset,
+ bvec->bv_len);
+ bvec_kunmap_irq(bvec_data, &flags);
+ kunmap_atomic(shared_data);
+ offset += bvec->bv_len;
+ }
+ }
+ /* Add the persistent grant into the list of free grants */
+ for (i = 0; i < s->req.u.rw.nr_segments; i++) {
+ llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
+ info->persistent_gnts_c++;
+ }
}
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -783,7 +911,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
req = info->shadow[id].request;
if (bret->operation != BLKIF_OP_DISCARD)
- blkif_completion(&info->shadow[id]);
+ blkif_completion(&info->shadow[id], info, bret);
if (add_id_to_freelist(info, id)) {
WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
@@ -942,6 +1070,11 @@ again:
message = "writing protocol";
goto abort_transaction;
}
+ err = xenbus_printf(xbt, dev->nodename,
+ "feature-persistent", "%u", 1);
+ if (err)
+ dev_warn(&dev->dev,
+ "writing persistent grants feature to xenbus");
err = xenbus_transaction_end(xbt, 0);
if (err) {
@@ -1029,6 +1162,8 @@ static int blkfront_probe(struct xenbus_device *dev,
spin_lock_init(&info->io_lock);
info->xbdev = dev;
info->vdevice = vdevice;
+ init_llist_head(&info->persistent_gnts);
+ info->persistent_gnts_c = 0;
info->connected = BLKIF_STATE_DISCONNECTED;
INIT_WORK(&info->work, blkif_restart_queue);
@@ -1093,7 +1228,7 @@ static int blkif_recover(struct blkfront_info *info)
req->u.rw.seg[j].gref,
info->xbdev->otherend_id,
pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
- rq_data_dir(info->shadow[req->u.rw.id].request));
+ 0);
}
info->shadow[req->u.rw.id].req = *req;
@@ -1225,7 +1360,7 @@ static void blkfront_connect(struct blkfront_info *info)
unsigned long sector_size;
unsigned int binfo;
int err;
- int barrier, flush, discard;
+ int barrier, flush, discard, persistent;
switch (info->connected) {
case BLKIF_STATE_CONNECTED:
@@ -1303,6 +1438,14 @@ static void blkfront_connect(struct blkfront_info *info)
if (!err && discard)
blkfront_setup_discard(info);
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ "feature-persistent", "%u", &persistent,
+ NULL);
+ if (err)
+ info->feature_persistent = 0;
+ else
+ info->feature_persistent = persistent;
+
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",