From dfc07b13dcacefda6ebdea14584ed8724dc980ef Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Apr 2011 14:24:23 -0400 Subject: xen/blkback: Move it from drivers/xen to drivers/block .. and modify the Makefile and Kconfig files appropriately. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/Kconfig | 8 + drivers/block/Makefile | 1 + drivers/block/xen-blkback/Makefile | 3 + drivers/block/xen-blkback/blkback.c | 759 ++++++++++++++++++++++++++++++++++ drivers/block/xen-blkback/common.h | 142 +++++++ drivers/block/xen-blkback/interface.c | 185 +++++++++ drivers/block/xen-blkback/vbd.c | 162 ++++++++ drivers/block/xen-blkback/xenbus.c | 562 +++++++++++++++++++++++++ 8 files changed, 1822 insertions(+) create mode 100644 drivers/block/xen-blkback/Makefile create mode 100644 drivers/block/xen-blkback/blkback.c create mode 100644 drivers/block/xen-blkback/common.h create mode 100644 drivers/block/xen-blkback/interface.c create mode 100644 drivers/block/xen-blkback/vbd.c create mode 100644 drivers/block/xen-blkback/xenbus.c (limited to 'drivers/block') diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 83c32cb72582..9abb64689712 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -470,6 +470,14 @@ config XEN_BLKDEV_FRONTEND block device driver. It communicates with a back-end driver in another domain which drives the actual block device. +config XEN_BLKDEV_BACKEND + tristate "Block-device backend driver" + depends on XEN_BACKEND + help + The block-device backend driver allows the kernel to export its + block devices to other guests via a high-performance shared-memory + interface. + config VIRTIO_BLK tristate "Virtio block driver (EXPERIMENTAL)" depends on EXPERIMENTAL && VIRTIO diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 40528ba56d1b..76646e9a1c91 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_BLK_DEV_UB) += ub.o obj-$(CONFIG_BLK_DEV_HD) += hd.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o +obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o diff --git a/drivers/block/xen-blkback/Makefile b/drivers/block/xen-blkback/Makefile new file mode 100644 index 000000000000..f1ae1ff07a4d --- /dev/null +++ b/drivers/block/xen-blkback/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o + +xen-blkback-y := blkback.o xenbus.o interface.o vbd.o diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c new file mode 100644 index 000000000000..59a2bae0f35e --- /dev/null +++ b/drivers/block/xen-blkback/blkback.c @@ -0,0 +1,759 @@ +/****************************************************************************** + * + * Back-end of the driver for virtual block devices. This portion of the + * driver exports a 'unified' block-device interface that can be accessed + * by any operating system that implements a compatible front end. A + * reference front-end implementation can be found in: + * drivers/block/xen-blkfront.c + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Copyright (c) 2005, Christopher Clark + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "common.h" + +#define WRITE_BARRIER (REQ_WRITE | REQ_FLUSH | REQ_FUA) + +/* + * These are rather arbitrary. They are fairly large because adjacent requests + * pulled from a communication ring are quite likely to end up being part of + * the same scatter/gather request at the disc. + * + * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** + * + * This will increase the chances of being able to write whole tracks. + * 64 should be enough to keep us competitive with Linux. + */ +static int blkif_reqs = 64; +module_param_named(reqs, blkif_reqs, int, 0); +MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); + +/* Run-time switchable: /sys/module/blkback/parameters/ */ +static unsigned int log_stats; +static unsigned int debug_lvl; +module_param(log_stats, int, 0644); +module_param(debug_lvl, int, 0644); + +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. Each buffer_head that completes decrements + * the pendcnt towards zero. When it hits zero, the specified domain has a + * response queued for it, with the saved 'id' passed back. + */ +struct pending_req { + struct blkif_st *blkif; + u64 id; + int nr_pages; + atomic_t pendcnt; + unsigned short operation; + int status; + struct list_head free_list; +}; + +#define BLKBACK_INVALID_HANDLE (~0) + +struct xen_blkbk { + struct pending_req *pending_reqs; + /* List of all 'pending_req' available */ + struct list_head pending_free; + /* And its spinlock. */ + spinlock_t pending_free_lock; + wait_queue_head_t pending_free_wq; + /* The list of all pages that are available. */ + struct page **pending_pages; + /* And the grant handles that are available. */ + grant_handle_t *pending_grant_handles; +}; + +static struct xen_blkbk *blkbk; + +/* + * Little helpful macro to figure out the index and virtual address of the + * pending_pages[..]. For each 'pending_req' we have have up to + * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through + * 10 and would index in the pending_pages[..]. */ +static inline int vaddr_pagenr(struct pending_req *req, int seg) +{ + return (req - blkbk->pending_reqs) * + BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; +} + +#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] + +static inline unsigned long vaddr(struct pending_req *req, int seg) +{ + unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); + return (unsigned long)pfn_to_kaddr(pfn); +} + +#define pending_handle(_req, _seg) \ + (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) + + +static int do_block_io_op(struct blkif_st *blkif); +static void dispatch_rw_block_io(struct blkif_st *blkif, + struct blkif_request *req, + struct pending_req *pending_req); +static void make_response(struct blkif_st *blkif, u64 id, + unsigned short op, int st); + +/* + * Retrieve from the 'pending_reqs' a free pending_req structure to be used. + */ +static struct pending_req *alloc_req(void) +{ + struct pending_req *req = NULL; + unsigned long flags; + + spin_lock_irqsave(&blkbk->pending_free_lock, flags); + if (!list_empty(&blkbk->pending_free)) { + req = list_entry(blkbk->pending_free.next, struct pending_req, + free_list); + list_del(&req->free_list); + } + spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); + return req; +} + +/* + * Return the 'pending_req' structure back to the freepool. We also + * wake up the thread if it was waiting for a free page. + */ +static void free_req(struct pending_req *req) +{ + unsigned long flags; + int was_empty; + + spin_lock_irqsave(&blkbk->pending_free_lock, flags); + was_empty = list_empty(&blkbk->pending_free); + list_add(&req->free_list, &blkbk->pending_free); + spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); + if (was_empty) + wake_up(&blkbk->pending_free_wq); +} + +/* + * Notification from the guest OS. + */ +static void blkif_notify_work(struct blkif_st *blkif) +{ + blkif->waiting_reqs = 1; + wake_up(&blkif->wq); +} + +irqreturn_t blkif_be_int(int irq, void *dev_id) +{ + blkif_notify_work(dev_id); + return IRQ_HANDLED; +} + +/* + * SCHEDULER FUNCTIONS + */ + +static void print_stats(struct blkif_st *blkif) +{ + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", + current->comm, blkif->st_oo_req, + blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req); + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); + blkif->st_rd_req = 0; + blkif->st_wr_req = 0; + blkif->st_oo_req = 0; +} + +int blkif_schedule(void *arg) +{ + struct blkif_st *blkif = arg; + struct vbd *vbd = &blkif->vbd; + + blkif_get(blkif); + + if (debug_lvl) + printk(KERN_DEBUG "%s: started\n", current->comm); + + while (!kthread_should_stop()) { + if (try_to_freeze()) + continue; + if (unlikely(vbd->size != vbd_size(vbd))) + vbd_resize(blkif); + + wait_event_interruptible( + blkif->wq, + blkif->waiting_reqs || kthread_should_stop()); + wait_event_interruptible( + blkbk->pending_free_wq, + !list_empty(&blkbk->pending_free) || + kthread_should_stop()); + + blkif->waiting_reqs = 0; + smp_mb(); /* clear flag *before* checking for work */ + + if (do_block_io_op(blkif)) + blkif->waiting_reqs = 1; + + if (log_stats && time_after(jiffies, blkif->st_print)) + print_stats(blkif); + } + + if (log_stats) + print_stats(blkif); + if (debug_lvl) + printk(KERN_DEBUG "%s: exiting\n", current->comm); + + blkif->xenblkd = NULL; + blkif_put(blkif); + + return 0; +} + +struct seg_buf { + unsigned long buf; + unsigned int nsec; +}; +/* + * Unmap the grant references, and also remove the M2P over-rides + * used in the 'pending_req'. +*/ +static void xen_blkbk_unmap(struct pending_req *req) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int i, invcount = 0; + grant_handle_t handle; + int ret; + + for (i = 0; i < req->nr_pages; i++) { + handle = pending_handle(req, i); + if (handle == BLKBACK_INVALID_HANDLE) + continue; + gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), + GNTMAP_host_map, handle); + pending_handle(req, i) = BLKBACK_INVALID_HANDLE; + invcount++; + } + + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); + /* Note, we use invcount, so nr->pages, so we can't index + * using vaddr(req, i). + */ + for (i = 0; i < invcount; i++) { + ret = m2p_remove_override( + virt_to_page(unmap[i].host_addr), false); + if (ret) { + printk(KERN_ALERT "Failed to remove M2P override for " \ + "%lx\n", (unsigned long)unmap[i].host_addr); + continue; + } + } +} +static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_req, + struct seg_buf seg[]) +{ + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int i; + int nseg = req->nr_segments; + int ret = 0; + /* Fill out preq.nr_sects with proper amount of sectors, and setup + * assign map[..] with the PFN of the page in our domain with the + * corresponding grant reference for each page. + */ + for (i = 0; i < nseg; i++) { + uint32_t flags; + + flags = GNTMAP_host_map; + if (pending_req->operation != BLKIF_OP_READ) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, + req->u.rw.seg[i].gref, pending_req->blkif->domid); + } + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); + BUG_ON(ret); + + /* Now swizzel the MFN in our domain with the MFN from the other domain + * so that when we access vaddr(pending_req,i) it has the contents of + * the page from the other domain. + */ + for (i = 0; i < nseg; i++) { + if (unlikely(map[i].status != 0)) { + DPRINTK("invalid buffer -- could not remap it\n"); + map[i].handle = BLKBACK_INVALID_HANDLE; + ret |= 1; + } + + pending_handle(pending_req, i) = map[i].handle; + + if (ret) + continue; + + ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), + blkbk->pending_page(pending_req, i), false); + if (ret) { + printk(KERN_ALERT "Failed to install M2P override for"\ + " %lx (ret: %d)\n", (unsigned long) + map[i].dev_bus_addr, ret); + /* We could switch over to GNTTABOP_copy */ + continue; + } + + seg[i].buf = map[i].dev_bus_addr | + (req->u.rw.seg[i].first_sect << 9); + } + return ret; +} + +/* + * Completion callback on the bio's. Called as bh->b_end_io() + */ + +static void __end_block_io_op(struct pending_req *pending_req, int error) +{ + /* An error fails the entire request. */ + if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && + (error == -EOPNOTSUPP)) { + DPRINTK("blkback: write barrier op failed, not supported\n"); + blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); + pending_req->status = BLKIF_RSP_EOPNOTSUPP; + } else if (error) { + DPRINTK("Buffer not up-to-date at end of operation, " + "error=%d\n", error); + pending_req->status = BLKIF_RSP_ERROR; + } + + /* If all of the bio's have completed it is time to unmap + * the grant references associated with 'request' and provide + * the proper response on the ring. + */ + if (atomic_dec_and_test(&pending_req->pendcnt)) { + xen_blkbk_unmap(pending_req); + make_response(pending_req->blkif, pending_req->id, + pending_req->operation, pending_req->status); + blkif_put(pending_req->blkif); + free_req(pending_req); + } +} + +/* + * bio callback. + */ +static void end_block_io_op(struct bio *bio, int error) +{ + __end_block_io_op(bio->bi_private, error); + bio_put(bio); +} + + + +/* + * Function to copy the from the ring buffer the 'struct blkif_request' + * (which has the sectors we want, number of them, grant references, etc), + * and transmute it to the block API to hand it over to the proper block disk. + */ +static int do_block_io_op(struct blkif_st *blkif) +{ + union blkif_back_rings *blk_rings = &blkif->blk_rings; + struct blkif_request req; + struct pending_req *pending_req; + RING_IDX rc, rp; + int more_to_do = 0; + + rc = blk_rings->common.req_cons; + rp = blk_rings->common.sring->req_prod; + rmb(); /* Ensure we see queued requests up to 'rp'. */ + + while (rc != rp) { + + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) + break; + + if (kthread_should_stop()) { + more_to_do = 1; + break; + } + + pending_req = alloc_req(); + if (NULL == pending_req) { + blkif->st_oo_req++; + more_to_do = 1; + break; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); + break; + case BLKIF_PROTOCOL_X86_32: + blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); + break; + case BLKIF_PROTOCOL_X86_64: + blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); + break; + default: + BUG(); + } + blk_rings->common.req_cons = ++rc; /* before make_response() */ + + /* Apply all sanity checks to /private copy/ of request. */ + barrier(); + + switch (req.operation) { + case BLKIF_OP_READ: + blkif->st_rd_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + case BLKIF_OP_WRITE_BARRIER: + blkif->st_br_req++; + /* fall through */ + case BLKIF_OP_WRITE: + blkif->st_wr_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + default: + /* A good sign something is wrong: sleep for a while to + * avoid excessive CPU consumption by a bad guest. */ + msleep(1); + DPRINTK("error: unknown block io operation [%d]\n", + req.operation); + make_response(blkif, req.id, req.operation, + BLKIF_RSP_ERROR); + free_req(pending_req); + break; + } + + /* Yield point for this unbounded loop. */ + cond_resched(); + } + + return more_to_do; +} + +/* + * Transumation of the 'struct blkif_request' to a proper 'struct bio' + * and call the 'submit_bio' to pass it to the underlaying storage. + */ +static void dispatch_rw_block_io(struct blkif_st *blkif, + struct blkif_request *req, + struct pending_req *pending_req) +{ + struct phys_req preq; + struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int nseg; + struct bio *bio = NULL; + struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int i, nbio = 0; + int operation; + struct blk_plug plug; + + switch (req->operation) { + case BLKIF_OP_READ: + operation = READ; + break; + case BLKIF_OP_WRITE: + operation = WRITE; + break; + case BLKIF_OP_WRITE_BARRIER: + operation = WRITE_BARRIER; + break; + default: + operation = 0; /* make gcc happy */ + BUG(); + } + + /* Check that the number of segments is sane. */ + nseg = req->nr_segments; + if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { + DPRINTK("Bad number of segments in request (%d)\n", nseg); + /* Haven't submitted any bio's yet. */ + goto fail_response; + } + + preq.dev = req->handle; + preq.sector_number = req->u.rw.sector_number; + preq.nr_sects = 0; + + pending_req->blkif = blkif; + pending_req->id = req->id; + pending_req->operation = req->operation; + pending_req->status = BLKIF_RSP_OKAY; + pending_req->nr_pages = nseg; + + for (i = 0; i < nseg; i++) { + seg[i].nsec = req->u.rw.seg[i].last_sect - + req->u.rw.seg[i].first_sect + 1; + if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || + (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) + goto fail_response; + preq.nr_sects += seg[i].nsec; + + } + + if (vbd_translate(&preq, blkif, operation) != 0) { + DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", + operation == READ ? "read" : "write", + preq.sector_number, + preq.sector_number + preq.nr_sects, preq.dev); + goto fail_response; + } + /* This check _MUST_ be done after vbd_translate as the preq.bdev + * is set there. */ + for (i = 0; i < nseg; i++) { + if (((int)preq.sector_number|(int)seg[i].nsec) & + ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { + DPRINTK("Misaligned I/O request from domain %d", + blkif->domid); + goto fail_response; + } + } + /* If we have failed at this point, we need to undo the M2P override, + * set gnttab_set_unmap_op on all of the grant references and perform + * the hypercall to unmap the grants - that is all done in + * xen_blkbk_unmap. + */ + if (xen_blkbk_map(req, pending_req, seg)) + goto fail_flush; + + /* This corresponding blkif_put is done in __end_block_io_op */ + blkif_get(blkif); + + for (i = 0; i < nseg; i++) { + while ((bio == NULL) || + (bio_add_page(bio, + blkbk->pending_page(pending_req, i), + seg[i].nsec << 9, + seg[i].buf & ~PAGE_MASK) == 0)) { + + bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i); + if (unlikely(bio == NULL)) + goto fail_put_bio; + + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + bio->bi_sector = preq.sector_number; + } + + preq.sector_number += seg[i].nsec; + } + + /* This will be hit if the operation was a barrier. */ + if (!bio) { + BUG_ON(operation != WRITE_BARRIER); + bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); + if (unlikely(bio == NULL)) + goto fail_put_bio; + + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + bio->bi_sector = -1; + } + + + /* We set it one so that the last submit_bio does not have to call + * atomic_inc. + */ + atomic_set(&pending_req->pendcnt, nbio); + + /* Get a reference count for the disk queue and start sending I/O */ + blk_start_plug(&plug); + + for (i = 0; i < nbio; i++) + submit_bio(operation, biolist[i]); + + blk_finish_plug(&plug); + /* Let the I/Os go.. */ + + if (operation == READ) + blkif->st_rd_sect += preq.nr_sects; + else if (operation == WRITE || operation == WRITE_BARRIER) + blkif->st_wr_sect += preq.nr_sects; + + return; + + fail_flush: + xen_blkbk_unmap(pending_req); + fail_response: + /* Haven't submitted any bio's yet. */ + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + free_req(pending_req); + msleep(1); /* back off a bit */ + return; + + fail_put_bio: + for (i = 0; i < (nbio-1); i++) + bio_put(biolist[i]); + __end_block_io_op(pending_req, -EINVAL); + msleep(1); /* back off a bit */ + return; +} + + + +/* + * Put a response on the ring on how the operation fared. + */ +static void make_response(struct blkif_st *blkif, u64 id, + unsigned short op, int st) +{ + struct blkif_response resp; + unsigned long flags; + union blkif_back_rings *blk_rings = &blkif->blk_rings; + int more_to_do = 0; + int notify; + + resp.id = id; + resp.operation = op; + resp.status = st; + + spin_lock_irqsave(&blkif->blk_ring_lock, flags); + /* Place on the response ring for the relevant domain. */ + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_32: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_64: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + default: + BUG(); + } + blk_rings->common.rsp_prod_pvt++; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); + if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { + /* + * Tail check for pending requests. Allows frontend to avoid + * notifications if requests are already in flight (lower + * overheads and promotes batching). + */ + RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); + + } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { + more_to_do = 1; + } + + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); + + if (more_to_do) + blkif_notify_work(blkif); + if (notify) + notify_remote_via_irq(blkif->irq); +} + +static int __init blkif_init(void) +{ + int i, mmap_pages; + int rc = 0; + + if (!xen_pv_domain()) + return -ENODEV; + + blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); + if (!blkbk) { + printk(KERN_ALERT "%s: out of memory!\n", __func__); + return -ENOMEM; + } + + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; + + blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * + blkif_reqs, GFP_KERNEL); + blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * + mmap_pages, GFP_KERNEL); + blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * + mmap_pages, GFP_KERNEL); + + if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || + !blkbk->pending_pages) { + rc = -ENOMEM; + goto out_of_memory; + } + + for (i = 0; i < mmap_pages; i++) { + blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; + blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); + if (blkbk->pending_pages[i] == NULL) { + rc = -ENOMEM; + goto out_of_memory; + } + } + rc = blkif_interface_init(); + if (rc) + goto failed_init; + + memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); + + INIT_LIST_HEAD(&blkbk->pending_free); + spin_lock_init(&blkbk->pending_free_lock); + init_waitqueue_head(&blkbk->pending_free_wq); + + for (i = 0; i < blkif_reqs; i++) + list_add_tail(&blkbk->pending_reqs[i].free_list, + &blkbk->pending_free); + + rc = blkif_xenbus_init(); + if (rc) + goto failed_init; + + return 0; + + out_of_memory: + printk(KERN_ERR "%s: out of memory\n", __func__); + failed_init: + kfree(blkbk->pending_reqs); + kfree(blkbk->pending_grant_handles); + for (i = 0; i < mmap_pages; i++) { + if (blkbk->pending_pages[i]) + __free_page(blkbk->pending_pages[i]); + } + kfree(blkbk->pending_pages); + kfree(blkbk); + blkbk = NULL; + return rc; +} + +module_init(blkif_init); + +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h new file mode 100644 index 000000000000..6257c1106591 --- /dev/null +++ b/drivers/block/xen-blkback/common.h @@ -0,0 +1,142 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __BLKIF__BACKEND__COMMON_H__ +#define __BLKIF__BACKEND__COMMON_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DPRINTK(_f, _a...) \ + pr_debug("(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a) + +struct vbd { + blkif_vdev_t handle; /* what the domain refers to this vbd as */ + unsigned char readonly; /* Non-zero -> read-only */ + unsigned char type; /* VDISK_xxx */ + u32 pdevice; /* phys device that this vbd maps to */ + struct block_device *bdev; + sector_t size; /* Cached size parameter */ +}; + +struct backend_info; + +struct blkif_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + /* Physical parameters of the comms window. */ + unsigned int irq; + /* Comms information. */ + enum blkif_protocol blk_protocol; + union blkif_back_rings blk_rings; + struct vm_struct *blk_ring_area; + /* The VBD attached to this interface. */ + struct vbd vbd; + /* Back pointer to the backend_info. */ + struct backend_info *be; + /* Private fields. */ + spinlock_t blk_ring_lock; + atomic_t refcnt; + + wait_queue_head_t wq; + /* One thread per one blkif. */ + struct task_struct *xenblkd; + unsigned int waiting_reqs; + + /* statistics */ + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_br_req; + int st_rd_sect; + int st_wr_sect; + + wait_queue_head_t waiting_to_free; + + grant_handle_t shmem_handle; + grant_ref_t shmem_ref; +}; + +struct blkif_st *blkif_alloc(domid_t domid); +void blkif_disconnect(struct blkif_st *blkif); +void blkif_free(struct blkif_st *blkif); +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn); +void vbd_resize(struct blkif_st *blkif); + +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define blkif_put(_b) \ + do { \ + if (atomic_dec_and_test(&(_b)->refcnt)) \ + wake_up(&(_b)->waiting_to_free);\ + } while (0) + +/* Create a vbd. */ +int vbd_create(struct blkif_st *blkif, blkif_vdev_t vdevice, unsigned major, + unsigned minor, int readonly, int cdrom); +void vbd_free(struct vbd *vbd); + +unsigned long long vbd_size(struct vbd *vbd); +unsigned int vbd_info(struct vbd *vbd); +unsigned long vbd_secsize(struct vbd *vbd); + +struct phys_req { + unsigned short dev; + unsigned short nr_sects; + struct block_device *bdev; + blkif_sector_t sector_number; +}; + +int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation); + +int blkif_interface_init(void); + +int blkif_xenbus_init(void); + +irqreturn_t blkif_be_int(int irq, void *dev_id); +int blkif_schedule(void *arg); + +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state); + +struct xenbus_device *blkback_xenbus(struct backend_info *be); + +#endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/block/xen-blkback/interface.c b/drivers/block/xen-blkback/interface.c new file mode 100644 index 000000000000..163aed41e825 --- /dev/null +++ b/drivers/block/xen-blkback/interface.c @@ -0,0 +1,185 @@ +/****************************************************************************** + * Block-device interface management. + * + * Copyright (c) 2004, Keir Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" +#include +#include +#include + +static struct kmem_cache *blkif_cachep; + +struct blkif_st *blkif_alloc(domid_t domid) +{ + struct blkif_st *blkif; + + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + if (!blkif) + return ERR_PTR(-ENOMEM); + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 1); + init_waitqueue_head(&blkif->wq); + blkif->st_print = jiffies; + init_waitqueue_head(&blkif->waiting_to_free); + + return blkif; +} + +static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status) { + DPRINTK(" Grant table operation failure !\n"); + return op.status; + } + + blkif->shmem_ref = shared_page; + blkif->shmem_handle = op.handle; + + return 0; +} + +static void unmap_frontend_page(struct blkif_st *blkif) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); +} + +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn) +{ + int err; + + /* Already connected through? */ + if (blkif->irq) + return 0; + + blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE); + if (!blkif->blk_ring_area) + return -ENOMEM; + + err = map_frontend_page(blkif, shared_page); + if (err) { + free_vm_area(blkif->blk_ring_area); + return err; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + { + struct blkif_sring *sring; + sring = (struct blkif_sring *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_32: + { + struct blkif_x86_32_sring *sring_x86_32; + sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + struct blkif_x86_64_sring *sring_x86_64; + sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + break; + } + default: + BUG(); + } + + err = bind_interdomain_evtchn_to_irqhandler( + blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); + if (err < 0) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + return err; + } + blkif->irq = err; + + return 0; +} + +void blkif_disconnect(struct blkif_st *blkif) +{ + if (blkif->xenblkd) { + kthread_stop(blkif->xenblkd); + blkif->xenblkd = NULL; + } + + atomic_dec(&blkif->refcnt); + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); + atomic_inc(&blkif->refcnt); + + if (blkif->irq) { + unbind_from_irqhandler(blkif->irq, blkif); + blkif->irq = 0; + } + + if (blkif->blk_rings.common.sring) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + } +} + +void blkif_free(struct blkif_st *blkif) +{ + if (!atomic_dec_and_test(&blkif->refcnt)) + BUG(); + kmem_cache_free(blkif_cachep, blkif); +} + +int __init blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct blkif_st), + 0, 0, NULL); + if (!blkif_cachep) + return -ENOMEM; + + return 0; +} diff --git a/drivers/block/xen-blkback/vbd.c b/drivers/block/xen-blkback/vbd.c new file mode 100644 index 000000000000..d0ff4cf91a34 --- /dev/null +++ b/drivers/block/xen-blkback/vbd.c @@ -0,0 +1,162 @@ +/****************************************************************************** + * Routines for managing virtual block devices (VBDs). + * + * Copyright (c) 2003-2005, Keir Fraser & Steve Hand + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" + +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ + (_v)->bdev->bd_part->nr_sects : \ + get_capacity((_v)->bdev->bd_disk)) + +unsigned long long vbd_size(struct vbd *vbd) +{ + return vbd_sz(vbd); +} + +unsigned int vbd_info(struct vbd *vbd) +{ + return vbd->type | (vbd->readonly ? VDISK_READONLY : 0); +} + +unsigned long vbd_secsize(struct vbd *vbd) +{ + return bdev_logical_block_size(vbd->bdev); +} + +int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, unsigned major, + unsigned minor, int readonly, int cdrom) +{ + struct vbd *vbd; + struct block_device *bdev; + + vbd = &blkif->vbd; + vbd->handle = handle; + vbd->readonly = readonly; + vbd->type = 0; + + vbd->pdevice = MKDEV(major, minor); + + bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? + FMODE_READ : FMODE_WRITE, NULL); + + if (IS_ERR(bdev)) { + DPRINTK("vbd_creat: device %08x could not be opened.\n", + vbd->pdevice); + return -ENOENT; + } + + vbd->bdev = bdev; + vbd->size = vbd_size(vbd); + + if (vbd->bdev->bd_disk == NULL) { + DPRINTK("vbd_creat: device %08x doesn't exist.\n", + vbd->pdevice); + vbd_free(vbd); + return -ENOENT; + } + + if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) + vbd->type |= VDISK_CDROM; + if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) + vbd->type |= VDISK_REMOVABLE; + + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", + handle, blkif->domid); + return 0; +} + +void vbd_free(struct vbd *vbd) +{ + if (vbd->bdev) + blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); + vbd->bdev = NULL; +} + +int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) +{ + struct vbd *vbd = &blkif->vbd; + int rc = -EACCES; + + if ((operation != READ) && vbd->readonly) + goto out; + + if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) + goto out; + + req->dev = vbd->pdevice; + req->bdev = vbd->bdev; + rc = 0; + + out: + return rc; +} + +void vbd_resize(struct blkif_st *blkif) +{ + struct vbd *vbd = &blkif->vbd; + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = blkback_xenbus(blkif->be); + unsigned long long new_size = vbd_size(vbd); + + printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", + blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); + printk(KERN_INFO "VBD Resize: new size %llu\n", new_size); + vbd->size = new_size; +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk(KERN_WARNING "Error starting transaction"); + return; + } + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", + vbd_size(vbd)); + if (err) { + printk(KERN_WARNING "Error writing new size"); + goto abort; + } + /* + * Write the current state; we will use this to synchronize + * the front-end. If the current state is "connected" the + * front-end will get the new size information online. + */ + err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); + if (err) { + printk(KERN_WARNING "Error writing the state"); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + printk(KERN_WARNING "Error ending transaction"); +abort: + xenbus_transaction_end(xbt, 1); +} diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c new file mode 100644 index 000000000000..b41ed65db2d3 --- /dev/null +++ b/drivers/block/xen-blkback/xenbus.c @@ -0,0 +1,562 @@ +/* Xenbus code for blkif backend + Copyright (C) 2005 Rusty Russell + Copyright (C) 2005 XenSource Ltd + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include "common.h" + +#undef DPRINTK +#define DPRINTK(fmt, args...) \ + pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ + __func__, __LINE__, ##args) + +struct backend_info { + struct xenbus_device *dev; + struct blkif_st *blkif; + struct xenbus_watch backend_watch; + unsigned major; + unsigned minor; + char *mode; +}; + +static void connect(struct backend_info *); +static int connect_ring(struct backend_info *); +static void backend_changed(struct xenbus_watch *, const char **, + unsigned int); + +struct xenbus_device *blkback_xenbus(struct backend_info *be) +{ + return be->dev; +} + +static int blkback_name(struct blkif_st *blkif, char *buf) +{ + char *devpath, *devname; + struct xenbus_device *dev = blkif->be->dev; + + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); + if (IS_ERR(devpath)) + return PTR_ERR(devpath); + + devname = strstr(devpath, "/dev/"); + if (devname != NULL) + devname += strlen("/dev/"); + else + devname = devpath; + + snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); + kfree(devpath); + + return 0; +} + +static void update_blkif_status(struct blkif_st *blkif) +{ + int err; + char name[TASK_COMM_LEN]; + + /* Not ready to connect? */ + if (!blkif->irq || !blkif->vbd.bdev) + return; + + /* Already connected? */ + if (blkif->be->dev->state == XenbusStateConnected) + return; + + /* Attempt to connect: exit if we fail to. */ + connect(blkif->be); + if (blkif->be->dev->state != XenbusStateConnected) + return; + + err = blkback_name(blkif, name); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); + return; + } + + err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "block flush"); + return; + } + invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); + + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); + if (IS_ERR(blkif->xenblkd)) { + err = PTR_ERR(blkif->xenblkd); + blkif->xenblkd = NULL; + xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); + } +} + + +/* + * sysfs interface for VBD I/O requests + */ + +#define VBD_SHOW(name, format, args...) \ + static ssize_t show_##name(struct device *_dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + struct xenbus_device *dev = to_xenbus_device(_dev); \ + struct backend_info *be = dev_get_drvdata(&dev->dev); \ + \ + return sprintf(buf, format, ##args); \ + } \ + static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) + +VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); +VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); +VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); +VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req); +VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); +VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); + +static struct attribute *vbdstat_attrs[] = { + &dev_attr_oo_req.attr, + &dev_attr_rd_req.attr, + &dev_attr_wr_req.attr, + &dev_attr_br_req.attr, + &dev_attr_rd_sect.attr, + &dev_attr_wr_sect.attr, + NULL +}; + +static struct attribute_group vbdstat_group = { + .name = "statistics", + .attrs = vbdstat_attrs, +}; + +VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); +VBD_SHOW(mode, "%s\n", be->mode); + +int xenvbd_sysfs_addif(struct xenbus_device *dev) +{ + int error; + + error = device_create_file(&dev->dev, &dev_attr_physical_device); + if (error) + goto fail1; + + error = device_create_file(&dev->dev, &dev_attr_mode); + if (error) + goto fail2; + + error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group); + if (error) + goto fail3; + + return 0; + +fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); +fail2: device_remove_file(&dev->dev, &dev_attr_mode); +fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); + return error; +} + +void xenvbd_sysfs_delif(struct xenbus_device *dev) +{ + sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); + device_remove_file(&dev->dev, &dev_attr_mode); + device_remove_file(&dev->dev, &dev_attr_physical_device); +} + +static int blkback_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + + DPRINTK(""); + + if (be->major || be->minor) + xenvbd_sysfs_delif(dev); + + if (be->backend_watch.node) { + unregister_xenbus_watch(&be->backend_watch); + kfree(be->backend_watch.node); + be->backend_watch.node = NULL; + } + + if (be->blkif) { + blkif_disconnect(be->blkif); + vbd_free(&be->blkif->vbd); + blkif_free(be->blkif); + be->blkif = NULL; + } + + kfree(be); + dev_set_drvdata(&dev->dev, NULL); + return 0; +} + +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state) +{ + struct xenbus_device *dev = be->dev; + int err; + + err = xenbus_printf(xbt, dev->nodename, "feature-barrier", + "%d", state); + if (err) + xenbus_dev_fatal(dev, err, "writing feature-barrier"); + + return err; +} + +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures, and watch the store waiting for the hotplug scripts to tell us + * the device's physical major and minor numbers. Switch to InitWait. + */ +static int blkback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + struct backend_info *be = kzalloc(sizeof(struct backend_info), + GFP_KERNEL); + if (!be) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + return -ENOMEM; + } + be->dev = dev; + dev_set_drvdata(&dev->dev, be); + + be->blkif = blkif_alloc(dev->otherend_id); + if (IS_ERR(be->blkif)) { + err = PTR_ERR(be->blkif); + be->blkif = NULL; + xenbus_dev_fatal(dev, err, "creating block interface"); + goto fail; + } + + /* setup back pointer */ + be->blkif->be = be; + + err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, + "%s/%s", dev->nodename, "physical-device"); + if (err) + goto fail; + + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto fail; + + return 0; + +fail: + DPRINTK("failed"); + blkback_remove(dev); + return err; +} + + +/** + * Callback received when the hotplug scripts have placed the physical-device + * node. Read it and the mode node, and create a vbd. If the frontend is + * ready, connect. + */ +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + int err; + unsigned major; + unsigned minor; + struct backend_info *be + = container_of(watch, struct backend_info, backend_watch); + struct xenbus_device *dev = be->dev; + int cdrom = 0; + char *device_type; + + DPRINTK(""); + + err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", + &major, &minor); + if (XENBUS_EXIST_ERR(err)) { + /* Since this watch will fire once immediately after it is + registered, we expect this. Ignore it, and wait for the + hotplug scripts. */ + return; + } + if (err != 2) { + xenbus_dev_fatal(dev, err, "reading physical-device"); + return; + } + + if ((be->major || be->minor) && + ((be->major != major) || (be->minor != minor))) { + printk(KERN_WARNING + "blkback: changing physical device (from %x:%x to " + "%x:%x) not supported.\n", be->major, be->minor, + major, minor); + return; + } + + be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); + if (IS_ERR(be->mode)) { + err = PTR_ERR(be->mode); + be->mode = NULL; + xenbus_dev_fatal(dev, err, "reading mode"); + return; + } + + device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); + if (!IS_ERR(device_type)) { + cdrom = strcmp(device_type, "cdrom") == 0; + kfree(device_type); + } + + if (be->major == 0 && be->minor == 0) { + /* Front end dir is a number, which is used as the handle. */ + + char *p = strrchr(dev->otherend, '/') + 1; + long handle; + err = strict_strtoul(p, 0, &handle); + if (err) + return; + + be->major = major; + be->minor = minor; + + err = vbd_create(be->blkif, handle, major, minor, + (NULL == strchr(be->mode, 'w')), cdrom); + if (err) { + be->major = be->minor = 0; + xenbus_dev_fatal(dev, err, "creating vbd structure"); + return; + } + + err = xenvbd_sysfs_addif(dev); + if (err) { + vbd_free(&be->blkif->vbd); + be->major = be->minor = 0; + xenbus_dev_fatal(dev, err, "creating sysfs entries"); + return; + } + + /* We're potentially connected now */ + update_blkif_status(be->blkif); + } +} + + +/** + * Callback received when the frontend's state changes. + */ +static void frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + int err; + + DPRINTK("%s", xenbus_strstate(frontend_state)); + + switch (frontend_state) { + case XenbusStateInitialising: + if (dev->state == XenbusStateClosed) { + printk(KERN_INFO "%s: %s: prepare for reconnect\n", + __func__, dev->nodename); + xenbus_switch_state(dev, XenbusStateInitWait); + } + break; + + case XenbusStateInitialised: + case XenbusStateConnected: + /* Ensure we connect even when two watches fire in + close successsion and we miss the intermediate value + of frontend_state. */ + if (dev->state == XenbusStateConnected) + break; + + /* Enforce precondition before potential leak point. + * blkif_disconnect() is idempotent. + */ + blkif_disconnect(be->blkif); + + err = connect_ring(be); + if (err) + break; + update_blkif_status(be->blkif); + break; + + case XenbusStateClosing: + blkif_disconnect(be->blkif); + xenbus_switch_state(dev, XenbusStateClosing); + break; + + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ + case XenbusStateUnknown: + /* implies blkif_disconnect() via blkback_remove() */ + device_unregister(&dev->dev); + break; + + default: + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + + +/* ** Connection ** */ + + +/** + * Write the physical details regarding the block device to the store, and + * switch to Connected state. + */ +static void connect(struct backend_info *be) +{ + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = be->dev; + + DPRINTK("%s", dev->otherend); + + /* Supply the information about the device the frontend needs */ +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + return; + } + + err = blkback_barrier(xbt, be, 1); + if (err) + goto abort; + + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", + vbd_size(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/sectors", + dev->nodename); + goto abort; + } + + /* FIXME: use a typename instead */ + err = xenbus_printf(xbt, dev->nodename, "info", "%u", + vbd_info(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/info", + dev->nodename); + goto abort; + } + err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", + vbd_secsize(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/sector-size", + dev->nodename); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + xenbus_dev_fatal(dev, err, "ending transaction"); + + err = xenbus_switch_state(dev, XenbusStateConnected); + if (err) + xenbus_dev_fatal(dev, err, "switching to Connected state", + dev->nodename); + + return; + abort: + xenbus_transaction_end(xbt, 1); +} + + +static int connect_ring(struct backend_info *be) +{ + struct xenbus_device *dev = be->dev; + unsigned long ring_ref; + unsigned int evtchn; + char protocol[64] = ""; + int err; + + DPRINTK("%s", dev->otherend); + + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", + &ring_ref, "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_fatal(dev, err, + "reading %s/ring-ref and event-channel", + dev->otherend); + return err; + } + + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", + "%63s", protocol, NULL); + if (err) + strcpy(protocol, "unspecified, assuming native"); + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; + else { + xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); + return -1; + } + printk(KERN_INFO + "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + ring_ref, evtchn, be->blkif->blk_protocol, protocol); + + /* Map the shared frame, irq etc. */ + err = blkif_map(be->blkif, ring_ref, evtchn); + if (err) { + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", + ring_ref, evtchn); + return err; + } + + return 0; +} + + +/* ** Driver Registration ** */ + + +static const struct xenbus_device_id blkback_ids[] = { + { "vbd" }, + { "" } +}; + + +static struct xenbus_driver blkback = { + .name = "vbd", + .owner = THIS_MODULE, + .ids = blkback_ids, + .probe = blkback_probe, + .remove = blkback_remove, + .otherend_changed = frontend_changed +}; + + +int blkif_xenbus_init(void) +{ + return xenbus_register_backend(&blkback); +} -- cgit v1.2.3 From ee9ff8537eacb4383bf9146df6c21b9301c9baa2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Apr 2011 10:57:29 -0400 Subject: xen/blkback: Squash vbd.c,interface.c in blkback.c and xenbus.c respectivly. Daniel Stodden suggested to eliminate vbd.c and interface.c, inlining the critical bits where they belong, respectively. Leaving only blkback.c for the data- and xenbus.c for the control path. Suggested-by: Daniel Stodden Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/Makefile | 2 +- drivers/block/xen-blkback/blkback.c | 135 +++++++++++++++++++++++++ drivers/block/xen-blkback/interface.c | 185 ---------------------------------- drivers/block/xen-blkback/vbd.c | 162 ----------------------------- drivers/block/xen-blkback/xenbus.c | 151 +++++++++++++++++++++++++++ 5 files changed, 287 insertions(+), 348 deletions(-) delete mode 100644 drivers/block/xen-blkback/interface.c delete mode 100644 drivers/block/xen-blkback/vbd.c (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/Makefile b/drivers/block/xen-blkback/Makefile index f1ae1ff07a4d..e491c1b76878 100644 --- a/drivers/block/xen-blkback/Makefile +++ b/drivers/block/xen-blkback/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o -xen-blkback-y := blkback.o xenbus.o interface.o vbd.o +xen-blkback-y := blkback.o xenbus.o diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 59a2bae0f35e..63001fac9af2 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -166,6 +166,141 @@ static void free_req(struct pending_req *req) wake_up(&blkbk->pending_free_wq); } +/* + * Routines for managing virtual block devices (vbds). + */ + +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ + (_v)->bdev->bd_part->nr_sects : \ + get_capacity((_v)->bdev->bd_disk)) + +unsigned long long vbd_size(struct vbd *vbd) +{ + return vbd_sz(vbd); +} + +unsigned int vbd_info(struct vbd *vbd) +{ + return vbd->type | (vbd->readonly ? VDISK_READONLY : 0); +} + +unsigned long vbd_secsize(struct vbd *vbd) +{ + return bdev_logical_block_size(vbd->bdev); +} + +int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, unsigned major, + unsigned minor, int readonly, int cdrom) +{ + struct vbd *vbd; + struct block_device *bdev; + + vbd = &blkif->vbd; + vbd->handle = handle; + vbd->readonly = readonly; + vbd->type = 0; + + vbd->pdevice = MKDEV(major, minor); + + bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? + FMODE_READ : FMODE_WRITE, NULL); + + if (IS_ERR(bdev)) { + DPRINTK("vbd_creat: device %08x could not be opened.\n", + vbd->pdevice); + return -ENOENT; + } + + vbd->bdev = bdev; + vbd->size = vbd_size(vbd); + + if (vbd->bdev->bd_disk == NULL) { + DPRINTK("vbd_creat: device %08x doesn't exist.\n", + vbd->pdevice); + vbd_free(vbd); + return -ENOENT; + } + + if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) + vbd->type |= VDISK_CDROM; + if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) + vbd->type |= VDISK_REMOVABLE; + + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", + handle, blkif->domid); + return 0; +} + +void vbd_free(struct vbd *vbd) +{ + if (vbd->bdev) + blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); + vbd->bdev = NULL; +} + +int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) +{ + struct vbd *vbd = &blkif->vbd; + int rc = -EACCES; + + if ((operation != READ) && vbd->readonly) + goto out; + + if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) + goto out; + + req->dev = vbd->pdevice; + req->bdev = vbd->bdev; + rc = 0; + + out: + return rc; +} + +void vbd_resize(struct blkif_st *blkif) +{ + struct vbd *vbd = &blkif->vbd; + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = blkback_xenbus(blkif->be); + unsigned long long new_size = vbd_size(vbd); + + printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", + blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); + printk(KERN_INFO "VBD Resize: new size %llu\n", new_size); + vbd->size = new_size; +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk(KERN_WARNING "Error starting transaction"); + return; + } + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", + vbd_size(vbd)); + if (err) { + printk(KERN_WARNING "Error writing new size"); + goto abort; + } + /* + * Write the current state; we will use this to synchronize + * the front-end. If the current state is "connected" the + * front-end will get the new size information online. + */ + err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); + if (err) { + printk(KERN_WARNING "Error writing the state"); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + printk(KERN_WARNING "Error ending transaction"); +abort: + xenbus_transaction_end(xbt, 1); +} + /* * Notification from the guest OS. */ diff --git a/drivers/block/xen-blkback/interface.c b/drivers/block/xen-blkback/interface.c deleted file mode 100644 index 163aed41e825..000000000000 --- a/drivers/block/xen-blkback/interface.c +++ /dev/null @@ -1,185 +0,0 @@ -/****************************************************************************** - * Block-device interface management. - * - * Copyright (c) 2004, Keir Fraser - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "common.h" -#include -#include -#include - -static struct kmem_cache *blkif_cachep; - -struct blkif_st *blkif_alloc(domid_t domid) -{ - struct blkif_st *blkif; - - blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); - if (!blkif) - return ERR_PTR(-ENOMEM); - - memset(blkif, 0, sizeof(*blkif)); - blkif->domid = domid; - spin_lock_init(&blkif->blk_ring_lock); - atomic_set(&blkif->refcnt, 1); - init_waitqueue_head(&blkif->wq); - blkif->st_print = jiffies; - init_waitqueue_head(&blkif->waiting_to_free); - - return blkif; -} - -static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) -{ - struct gnttab_map_grant_ref op; - - gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, - GNTMAP_host_map, shared_page, blkif->domid); - - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); - - if (op.status) { - DPRINTK(" Grant table operation failure !\n"); - return op.status; - } - - blkif->shmem_ref = shared_page; - blkif->shmem_handle = op.handle; - - return 0; -} - -static void unmap_frontend_page(struct blkif_st *blkif) -{ - struct gnttab_unmap_grant_ref op; - - gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, - GNTMAP_host_map, blkif->shmem_handle); - - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) - BUG(); -} - -int blkif_map(struct blkif_st *blkif, unsigned long shared_page, - unsigned int evtchn) -{ - int err; - - /* Already connected through? */ - if (blkif->irq) - return 0; - - blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE); - if (!blkif->blk_ring_area) - return -ENOMEM; - - err = map_frontend_page(blkif, shared_page); - if (err) { - free_vm_area(blkif->blk_ring_area); - return err; - } - - switch (blkif->blk_protocol) { - case BLKIF_PROTOCOL_NATIVE: - { - struct blkif_sring *sring; - sring = (struct blkif_sring *)blkif->blk_ring_area->addr; - BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); - break; - } - case BLKIF_PROTOCOL_X86_32: - { - struct blkif_x86_32_sring *sring_x86_32; - sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; - BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); - break; - } - case BLKIF_PROTOCOL_X86_64: - { - struct blkif_x86_64_sring *sring_x86_64; - sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; - BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); - break; - } - default: - BUG(); - } - - err = bind_interdomain_evtchn_to_irqhandler( - blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); - if (err < 0) { - unmap_frontend_page(blkif); - free_vm_area(blkif->blk_ring_area); - blkif->blk_rings.common.sring = NULL; - return err; - } - blkif->irq = err; - - return 0; -} - -void blkif_disconnect(struct blkif_st *blkif) -{ - if (blkif->xenblkd) { - kthread_stop(blkif->xenblkd); - blkif->xenblkd = NULL; - } - - atomic_dec(&blkif->refcnt); - wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); - atomic_inc(&blkif->refcnt); - - if (blkif->irq) { - unbind_from_irqhandler(blkif->irq, blkif); - blkif->irq = 0; - } - - if (blkif->blk_rings.common.sring) { - unmap_frontend_page(blkif); - free_vm_area(blkif->blk_ring_area); - blkif->blk_rings.common.sring = NULL; - } -} - -void blkif_free(struct blkif_st *blkif) -{ - if (!atomic_dec_and_test(&blkif->refcnt)) - BUG(); - kmem_cache_free(blkif_cachep, blkif); -} - -int __init blkif_interface_init(void) -{ - blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct blkif_st), - 0, 0, NULL); - if (!blkif_cachep) - return -ENOMEM; - - return 0; -} diff --git a/drivers/block/xen-blkback/vbd.c b/drivers/block/xen-blkback/vbd.c deleted file mode 100644 index d0ff4cf91a34..000000000000 --- a/drivers/block/xen-blkback/vbd.c +++ /dev/null @@ -1,162 +0,0 @@ -/****************************************************************************** - * Routines for managing virtual block devices (VBDs). - * - * Copyright (c) 2003-2005, Keir Fraser & Steve Hand - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "common.h" - -#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : \ - get_capacity((_v)->bdev->bd_disk)) - -unsigned long long vbd_size(struct vbd *vbd) -{ - return vbd_sz(vbd); -} - -unsigned int vbd_info(struct vbd *vbd) -{ - return vbd->type | (vbd->readonly ? VDISK_READONLY : 0); -} - -unsigned long vbd_secsize(struct vbd *vbd) -{ - return bdev_logical_block_size(vbd->bdev); -} - -int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, unsigned major, - unsigned minor, int readonly, int cdrom) -{ - struct vbd *vbd; - struct block_device *bdev; - - vbd = &blkif->vbd; - vbd->handle = handle; - vbd->readonly = readonly; - vbd->type = 0; - - vbd->pdevice = MKDEV(major, minor); - - bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? - FMODE_READ : FMODE_WRITE, NULL); - - if (IS_ERR(bdev)) { - DPRINTK("vbd_creat: device %08x could not be opened.\n", - vbd->pdevice); - return -ENOENT; - } - - vbd->bdev = bdev; - vbd->size = vbd_size(vbd); - - if (vbd->bdev->bd_disk == NULL) { - DPRINTK("vbd_creat: device %08x doesn't exist.\n", - vbd->pdevice); - vbd_free(vbd); - return -ENOENT; - } - - if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) - vbd->type |= VDISK_CDROM; - if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) - vbd->type |= VDISK_REMOVABLE; - - DPRINTK("Successful creation of handle=%04x (dom=%u)\n", - handle, blkif->domid); - return 0; -} - -void vbd_free(struct vbd *vbd) -{ - if (vbd->bdev) - blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); - vbd->bdev = NULL; -} - -int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) -{ - struct vbd *vbd = &blkif->vbd; - int rc = -EACCES; - - if ((operation != READ) && vbd->readonly) - goto out; - - if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) - goto out; - - req->dev = vbd->pdevice; - req->bdev = vbd->bdev; - rc = 0; - - out: - return rc; -} - -void vbd_resize(struct blkif_st *blkif) -{ - struct vbd *vbd = &blkif->vbd; - struct xenbus_transaction xbt; - int err; - struct xenbus_device *dev = blkback_xenbus(blkif->be); - unsigned long long new_size = vbd_size(vbd); - - printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", - blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); - printk(KERN_INFO "VBD Resize: new size %llu\n", new_size); - vbd->size = new_size; -again: - err = xenbus_transaction_start(&xbt); - if (err) { - printk(KERN_WARNING "Error starting transaction"); - return; - } - err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", - vbd_size(vbd)); - if (err) { - printk(KERN_WARNING "Error writing new size"); - goto abort; - } - /* - * Write the current state; we will use this to synchronize - * the front-end. If the current state is "connected" the - * front-end will get the new size information online. - */ - err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); - if (err) { - printk(KERN_WARNING "Error writing the state"); - goto abort; - } - - err = xenbus_transaction_end(xbt, 0); - if (err == -EAGAIN) - goto again; - if (err) - printk(KERN_WARNING "Error ending transaction"); -abort: - xenbus_transaction_end(xbt, 1); -} diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index b41ed65db2d3..0c263a248007 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include "common.h" #undef DPRINTK @@ -36,6 +38,7 @@ struct backend_info { char *mode; }; +static struct kmem_cache *blkif_cachep; static void connect(struct backend_info *); static int connect_ring(struct backend_info *); static void backend_changed(struct xenbus_watch *, const char **, @@ -106,6 +109,154 @@ static void update_blkif_status(struct blkif_st *blkif) } } +struct blkif_st *blkif_alloc(domid_t domid) +{ + struct blkif_st *blkif; + + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + if (!blkif) + return ERR_PTR(-ENOMEM); + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 1); + init_waitqueue_head(&blkif->wq); + blkif->st_print = jiffies; + init_waitqueue_head(&blkif->waiting_to_free); + + return blkif; +} + +static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status) { + DPRINTK(" Grant table operation failure !\n"); + return op.status; + } + + blkif->shmem_ref = shared_page; + blkif->shmem_handle = op.handle; + + return 0; +} + +static void unmap_frontend_page(struct blkif_st *blkif) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); +} + +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn) +{ + int err; + + /* Already connected through? */ + if (blkif->irq) + return 0; + + blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE); + if (!blkif->blk_ring_area) + return -ENOMEM; + + err = map_frontend_page(blkif, shared_page); + if (err) { + free_vm_area(blkif->blk_ring_area); + return err; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + { + struct blkif_sring *sring; + sring = (struct blkif_sring *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_32: + { + struct blkif_x86_32_sring *sring_x86_32; + sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + struct blkif_x86_64_sring *sring_x86_64; + sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + break; + } + default: + BUG(); + } + + err = bind_interdomain_evtchn_to_irqhandler( + blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); + if (err < 0) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + return err; + } + blkif->irq = err; + + return 0; +} + +void blkif_disconnect(struct blkif_st *blkif) +{ + if (blkif->xenblkd) { + kthread_stop(blkif->xenblkd); + blkif->xenblkd = NULL; + } + + atomic_dec(&blkif->refcnt); + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); + atomic_inc(&blkif->refcnt); + + if (blkif->irq) { + unbind_from_irqhandler(blkif->irq, blkif); + blkif->irq = 0; + } + + if (blkif->blk_rings.common.sring) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + } +} + +void blkif_free(struct blkif_st *blkif) +{ + if (!atomic_dec_and_test(&blkif->refcnt)) + BUG(); + kmem_cache_free(blkif_cachep, blkif); +} + +int __init blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct blkif_st), + 0, 0, NULL); + if (!blkif_cachep) + return -ENOMEM; + + return 0; +} /* * sysfs interface for VBD I/O requests -- cgit v1.2.3 From 6cd0388cd600a51a8824dc5b34f1107b367b0cac Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Apr 2011 11:01:47 -0400 Subject: xen-blkback: Remove from the copyright notice the address. There is no need for it, as the address is updated constatly in the root of the Linux kernel. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 0c263a248007..c6c5286aa813 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -12,9 +12,6 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include -- cgit v1.2.3 From 42c7841d171a2fe32005738dfebd724a90921496 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Apr 2011 11:21:43 -0400 Subject: xen-blkback: Inline some of the functions that were moved from vbd/interface.c Shuffling code around. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 78 +++---------------------------------- drivers/block/xen-blkback/common.h | 22 ++--------- drivers/block/xen-blkback/xenbus.c | 58 +++++++++++++++++++++++++-- 3 files changed, 65 insertions(+), 93 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 63001fac9af2..806c2c947c63 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -170,75 +170,9 @@ static void free_req(struct pending_req *req) * Routines for managing virtual block devices (vbds). */ -#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : \ - get_capacity((_v)->bdev->bd_disk)) -unsigned long long vbd_size(struct vbd *vbd) -{ - return vbd_sz(vbd); -} - -unsigned int vbd_info(struct vbd *vbd) -{ - return vbd->type | (vbd->readonly ? VDISK_READONLY : 0); -} - -unsigned long vbd_secsize(struct vbd *vbd) -{ - return bdev_logical_block_size(vbd->bdev); -} - -int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, unsigned major, - unsigned minor, int readonly, int cdrom) -{ - struct vbd *vbd; - struct block_device *bdev; - - vbd = &blkif->vbd; - vbd->handle = handle; - vbd->readonly = readonly; - vbd->type = 0; - - vbd->pdevice = MKDEV(major, minor); - - bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? - FMODE_READ : FMODE_WRITE, NULL); - - if (IS_ERR(bdev)) { - DPRINTK("vbd_creat: device %08x could not be opened.\n", - vbd->pdevice); - return -ENOENT; - } - - vbd->bdev = bdev; - vbd->size = vbd_size(vbd); - - if (vbd->bdev->bd_disk == NULL) { - DPRINTK("vbd_creat: device %08x doesn't exist.\n", - vbd->pdevice); - vbd_free(vbd); - return -ENOENT; - } - - if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) - vbd->type |= VDISK_CDROM; - if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) - vbd->type |= VDISK_REMOVABLE; - - DPRINTK("Successful creation of handle=%04x (dom=%u)\n", - handle, blkif->domid); - return 0; -} - -void vbd_free(struct vbd *vbd) -{ - if (vbd->bdev) - blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); - vbd->bdev = NULL; -} - -int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) +static int vbd_translate(struct phys_req *req, struct blkif_st *blkif, + int operation) { struct vbd *vbd = &blkif->vbd; int rc = -EACCES; @@ -257,13 +191,13 @@ int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) return rc; } -void vbd_resize(struct blkif_st *blkif) +static void vbd_resize(struct blkif_st *blkif) { struct vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; int err; struct xenbus_device *dev = blkback_xenbus(blkif->be); - unsigned long long new_size = vbd_size(vbd); + unsigned long long new_size = vbd_sz(vbd); printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); @@ -276,7 +210,7 @@ again: return; } err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", - vbd_size(vbd)); + (unsigned long long)vbd_sz(vbd)); if (err) { printk(KERN_WARNING "Error writing new size"); goto abort; @@ -344,7 +278,7 @@ int blkif_schedule(void *arg) while (!kthread_should_stop()) { if (try_to_freeze()) continue; - if (unlikely(vbd->size != vbd_size(vbd))) + if (unlikely(vbd->size != vbd_sz(vbd))) vbd_resize(blkif); wait_event_interruptible( diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 6257c1106591..4b5acb3e8b24 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -95,12 +95,10 @@ struct blkif_st { grant_ref_t shmem_ref; }; -struct blkif_st *blkif_alloc(domid_t domid); -void blkif_disconnect(struct blkif_st *blkif); -void blkif_free(struct blkif_st *blkif); -int blkif_map(struct blkif_st *blkif, unsigned long shared_page, - unsigned int evtchn); -void vbd_resize(struct blkif_st *blkif); + +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ + (_v)->bdev->bd_part->nr_sects : \ + get_capacity((_v)->bdev->bd_disk)) #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ @@ -109,24 +107,12 @@ void vbd_resize(struct blkif_st *blkif); wake_up(&(_b)->waiting_to_free);\ } while (0) -/* Create a vbd. */ -int vbd_create(struct blkif_st *blkif, blkif_vdev_t vdevice, unsigned major, - unsigned minor, int readonly, int cdrom); -void vbd_free(struct vbd *vbd); - -unsigned long long vbd_size(struct vbd *vbd); -unsigned int vbd_info(struct vbd *vbd); -unsigned long vbd_secsize(struct vbd *vbd); - struct phys_req { unsigned short dev; unsigned short nr_sects; struct block_device *bdev; blkif_sector_t sector_number; }; - -int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation); - int blkif_interface_init(void); int blkif_xenbus_init(void); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index c6c5286aa813..75bf49bd365c 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -327,6 +327,56 @@ void xenvbd_sysfs_delif(struct xenbus_device *dev) device_remove_file(&dev->dev, &dev_attr_physical_device); } + +static void vbd_free(struct vbd *vbd) +{ + if (vbd->bdev) + blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); + vbd->bdev = NULL; +} + +static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, + unsigned major, unsigned minor, int readonly, + int cdrom) +{ + struct vbd *vbd; + struct block_device *bdev; + + vbd = &blkif->vbd; + vbd->handle = handle; + vbd->readonly = readonly; + vbd->type = 0; + + vbd->pdevice = MKDEV(major, minor); + + bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? + FMODE_READ : FMODE_WRITE, NULL); + + if (IS_ERR(bdev)) { + DPRINTK("vbd_creat: device %08x could not be opened.\n", + vbd->pdevice); + return -ENOENT; + } + + vbd->bdev = bdev; + vbd->size = vbd_sz(vbd); + + if (vbd->bdev->bd_disk == NULL) { + DPRINTK("vbd_creat: device %08x doesn't exist.\n", + vbd->pdevice); + vbd_free(vbd); + return -ENOENT; + } + + if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) + vbd->type |= VDISK_CDROM; + if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) + vbd->type |= VDISK_REMOVABLE; + + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", + handle, blkif->domid); + return 0; +} static int blkback_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(&dev->dev); @@ -595,7 +645,7 @@ again: goto abort; err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", - vbd_size(&be->blkif->vbd)); + (unsigned long long)vbd_sz(&be->blkif->vbd)); if (err) { xenbus_dev_fatal(dev, err, "writing %s/sectors", dev->nodename); @@ -604,14 +654,16 @@ again: /* FIXME: use a typename instead */ err = xenbus_printf(xbt, dev->nodename, "info", "%u", - vbd_info(&be->blkif->vbd)); + be->blkif->vbd.type | + (be->blkif->vbd.readonly ? VDISK_READONLY : 0)); if (err) { xenbus_dev_fatal(dev, err, "writing %s/info", dev->nodename); goto abort; } err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", - vbd_secsize(&be->blkif->vbd)); + (unsigned long) + bdev_logical_block_size(be->blkif->vbd.bdev)); if (err) { xenbus_dev_fatal(dev, err, "writing %s/sector-size", dev->nodename); -- cgit v1.2.3 From 8b6bf747d70e5bac1a34c8fd773230e1cfdd7546 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Apr 2011 11:50:43 -0400 Subject: xen/blkback: Prefix exposed functions with xen_ And also shorten the name if it has blkback to blkbk. This results in the symbol table (if compiled in the kernel) to be much shorter, prettier, and also easier to search for. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 36 ++++++++--------- drivers/block/xen-blkback/common.h | 18 ++++----- drivers/block/xen-blkback/xenbus.c | 80 +++++++++++++++++++------------------ 3 files changed, 68 insertions(+), 66 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 806c2c947c63..c4bc85e69d33 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -53,13 +53,13 @@ * pulled from a communication ring are quite likely to end up being part of * the same scatter/gather request at the disc. * - * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** + * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** * * This will increase the chances of being able to write whole tracks. * 64 should be enough to keep us competitive with Linux. */ -static int blkif_reqs = 64; -module_param_named(reqs, blkif_reqs, int, 0); +static int xen_blkif_reqs = 64; +module_param_named(reqs, xen_blkif_reqs, int, 0); MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); /* Run-time switchable: /sys/module/blkback/parameters/ */ @@ -196,7 +196,7 @@ static void vbd_resize(struct blkif_st *blkif) struct vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; int err; - struct xenbus_device *dev = blkback_xenbus(blkif->be); + struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); unsigned long long new_size = vbd_sz(vbd); printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", @@ -244,7 +244,7 @@ static void blkif_notify_work(struct blkif_st *blkif) wake_up(&blkif->wq); } -irqreturn_t blkif_be_int(int irq, void *dev_id) +irqreturn_t xen_blkif_be_int(int irq, void *dev_id) { blkif_notify_work(dev_id); return IRQ_HANDLED; @@ -265,12 +265,12 @@ static void print_stats(struct blkif_st *blkif) blkif->st_oo_req = 0; } -int blkif_schedule(void *arg) +int xen_blkif_schedule(void *arg) { struct blkif_st *blkif = arg; struct vbd *vbd = &blkif->vbd; - blkif_get(blkif); + xen_blkif_get(blkif); if (debug_lvl) printk(KERN_DEBUG "%s: started\n", current->comm); @@ -305,7 +305,7 @@ int blkif_schedule(void *arg) printk(KERN_DEBUG "%s: exiting\n", current->comm); blkif->xenblkd = NULL; - blkif_put(blkif); + xen_blkif_put(blkif); return 0; } @@ -417,7 +417,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && (error == -EOPNOTSUPP)) { DPRINTK("blkback: write barrier op failed, not supported\n"); - blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); + xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if (error) { DPRINTK("Buffer not up-to-date at end of operation, " @@ -433,7 +433,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) xen_blkbk_unmap(pending_req); make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); - blkif_put(pending_req->blkif); + xen_blkif_put(pending_req->blkif); free_req(pending_req); } } @@ -619,7 +619,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_flush; /* This corresponding blkif_put is done in __end_block_io_op */ - blkif_get(blkif); + xen_blkif_get(blkif); for (i = 0; i < nseg; i++) { while ((bio == NULL) || @@ -751,7 +751,7 @@ static void make_response(struct blkif_st *blkif, u64 id, notify_remote_via_irq(blkif->irq); } -static int __init blkif_init(void) +static int __init xen_blkif_init(void) { int i, mmap_pages; int rc = 0; @@ -765,10 +765,10 @@ static int __init blkif_init(void) return -ENOMEM; } - mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; + mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * - blkif_reqs, GFP_KERNEL); + xen_blkif_reqs, GFP_KERNEL); blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * mmap_pages, GFP_KERNEL); blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * @@ -788,7 +788,7 @@ static int __init blkif_init(void) goto out_of_memory; } } - rc = blkif_interface_init(); + rc = xen_blkif_interface_init(); if (rc) goto failed_init; @@ -798,11 +798,11 @@ static int __init blkif_init(void) spin_lock_init(&blkbk->pending_free_lock); init_waitqueue_head(&blkbk->pending_free_wq); - for (i = 0; i < blkif_reqs; i++) + for (i = 0; i < xen_blkif_reqs; i++) list_add_tail(&blkbk->pending_reqs[i].free_list, &blkbk->pending_free); - rc = blkif_xenbus_init(); + rc = xen_blkif_xenbus_init(); if (rc) goto failed_init; @@ -823,6 +823,6 @@ static int __init blkif_init(void) return rc; } -module_init(blkif_init); +module_init(xen_blkif_init); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 4b5acb3e8b24..16af388268e7 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -100,8 +100,8 @@ struct blkif_st { (_v)->bdev->bd_part->nr_sects : \ get_capacity((_v)->bdev->bd_disk)) -#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) -#define blkif_put(_b) \ +#define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define xen_blkif_put(_b) \ do { \ if (atomic_dec_and_test(&(_b)->refcnt)) \ wake_up(&(_b)->waiting_to_free);\ @@ -113,16 +113,16 @@ struct phys_req { struct block_device *bdev; blkif_sector_t sector_number; }; -int blkif_interface_init(void); +int xen_blkif_interface_init(void); -int blkif_xenbus_init(void); +int xen_blkif_xenbus_init(void); -irqreturn_t blkif_be_int(int irq, void *dev_id); -int blkif_schedule(void *arg); +irqreturn_t xen_blkif_be_int(int irq, void *dev_id); +int xen_blkif_schedule(void *arg); -int blkback_barrier(struct xenbus_transaction xbt, - struct backend_info *be, int state); +int xen_blkbk_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state); -struct xenbus_device *blkback_xenbus(struct backend_info *be); +struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); #endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 75bf49bd365c..64b0a1c760fb 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -35,13 +35,13 @@ struct backend_info { char *mode; }; -static struct kmem_cache *blkif_cachep; +static struct kmem_cache *xen_blkif_cachep; static void connect(struct backend_info *); static int connect_ring(struct backend_info *); static void backend_changed(struct xenbus_watch *, const char **, unsigned int); -struct xenbus_device *blkback_xenbus(struct backend_info *be) +struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) { return be->dev; } @@ -67,7 +67,7 @@ static int blkback_name(struct blkif_st *blkif, char *buf) return 0; } -static void update_blkif_status(struct blkif_st *blkif) +static void xen_update_blkif_status(struct blkif_st *blkif) { int err; char name[TASK_COMM_LEN]; @@ -98,7 +98,7 @@ static void update_blkif_status(struct blkif_st *blkif) } invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); - blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); + blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name); if (IS_ERR(blkif->xenblkd)) { err = PTR_ERR(blkif->xenblkd); blkif->xenblkd = NULL; @@ -106,11 +106,11 @@ static void update_blkif_status(struct blkif_st *blkif) } } -struct blkif_st *blkif_alloc(domid_t domid) +static struct blkif_st *xen_blkif_alloc(domid_t domid) { struct blkif_st *blkif; - blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL); if (!blkif) return ERR_PTR(-ENOMEM); @@ -157,8 +157,8 @@ static void unmap_frontend_page(struct blkif_st *blkif) BUG(); } -int blkif_map(struct blkif_st *blkif, unsigned long shared_page, - unsigned int evtchn) +static int xen_blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn) { int err; @@ -202,8 +202,9 @@ int blkif_map(struct blkif_st *blkif, unsigned long shared_page, BUG(); } - err = bind_interdomain_evtchn_to_irqhandler( - blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); + err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, + xen_blkif_be_int, 0, + "blkif-backend", blkif); if (err < 0) { unmap_frontend_page(blkif); free_vm_area(blkif->blk_ring_area); @@ -215,7 +216,7 @@ int blkif_map(struct blkif_st *blkif, unsigned long shared_page, return 0; } -void blkif_disconnect(struct blkif_st *blkif) +static void xen_blkif_disconnect(struct blkif_st *blkif) { if (blkif->xenblkd) { kthread_stop(blkif->xenblkd); @@ -238,18 +239,19 @@ void blkif_disconnect(struct blkif_st *blkif) } } -void blkif_free(struct blkif_st *blkif) +void xen_blkif_free(struct blkif_st *blkif) { if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); - kmem_cache_free(blkif_cachep, blkif); + kmem_cache_free(xen_blkif_cachep, blkif); } -int __init blkif_interface_init(void) +int __init xen_blkif_interface_init(void) { - blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct blkif_st), - 0, 0, NULL); - if (!blkif_cachep) + xen_blkif_cachep = kmem_cache_create("blkif_cache", + sizeof(struct blkif_st), + 0, 0, NULL); + if (!xen_blkif_cachep) return -ENOMEM; return 0; @@ -377,7 +379,7 @@ static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, handle, blkif->domid); return 0; } -static int blkback_remove(struct xenbus_device *dev) +static int xen_blkbk_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(&dev->dev); @@ -393,9 +395,9 @@ static int blkback_remove(struct xenbus_device *dev) } if (be->blkif) { - blkif_disconnect(be->blkif); + xen_blkif_disconnect(be->blkif); vbd_free(&be->blkif->vbd); - blkif_free(be->blkif); + xen_blkif_free(be->blkif); be->blkif = NULL; } @@ -404,8 +406,8 @@ static int blkback_remove(struct xenbus_device *dev) return 0; } -int blkback_barrier(struct xenbus_transaction xbt, - struct backend_info *be, int state) +int xen_blkbk_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state) { struct xenbus_device *dev = be->dev; int err; @@ -423,8 +425,8 @@ int blkback_barrier(struct xenbus_transaction xbt, * structures, and watch the store waiting for the hotplug scripts to tell us * the device's physical major and minor numbers. Switch to InitWait. */ -static int blkback_probe(struct xenbus_device *dev, - const struct xenbus_device_id *id) +static int xen_blkbk_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) { int err; struct backend_info *be = kzalloc(sizeof(struct backend_info), @@ -437,7 +439,7 @@ static int blkback_probe(struct xenbus_device *dev, be->dev = dev; dev_set_drvdata(&dev->dev, be); - be->blkif = blkif_alloc(dev->otherend_id); + be->blkif = xen_blkif_alloc(dev->otherend_id); if (IS_ERR(be->blkif)) { err = PTR_ERR(be->blkif); be->blkif = NULL; @@ -461,7 +463,7 @@ static int blkback_probe(struct xenbus_device *dev, fail: DPRINTK("failed"); - blkback_remove(dev); + xen_blkbk_remove(dev); return err; } @@ -550,7 +552,7 @@ static void backend_changed(struct xenbus_watch *watch, } /* We're potentially connected now */ - update_blkif_status(be->blkif); + xen_update_blkif_status(be->blkif); } } @@ -586,16 +588,16 @@ static void frontend_changed(struct xenbus_device *dev, /* Enforce precondition before potential leak point. * blkif_disconnect() is idempotent. */ - blkif_disconnect(be->blkif); + xen_blkif_disconnect(be->blkif); err = connect_ring(be); if (err) break; - update_blkif_status(be->blkif); + xen_update_blkif_status(be->blkif); break; case XenbusStateClosing: - blkif_disconnect(be->blkif); + xen_blkif_disconnect(be->blkif); xenbus_switch_state(dev, XenbusStateClosing); break; @@ -640,7 +642,7 @@ again: return; } - err = blkback_barrier(xbt, be, 1); + err = xen_blkbk_barrier(xbt, be, 1); if (err) goto abort; @@ -726,7 +728,7 @@ static int connect_ring(struct backend_info *be) ring_ref, evtchn, be->blkif->blk_protocol, protocol); /* Map the shared frame, irq etc. */ - err = blkif_map(be->blkif, ring_ref, evtchn); + err = xen_blkif_map(be->blkif, ring_ref, evtchn); if (err) { xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", ring_ref, evtchn); @@ -740,23 +742,23 @@ static int connect_ring(struct backend_info *be) /* ** Driver Registration ** */ -static const struct xenbus_device_id blkback_ids[] = { +static const struct xenbus_device_id xen_blkbk_ids[] = { { "vbd" }, { "" } }; -static struct xenbus_driver blkback = { +static struct xenbus_driver xen_blkbk = { .name = "vbd", .owner = THIS_MODULE, - .ids = blkback_ids, - .probe = blkback_probe, - .remove = blkback_remove, + .ids = xen_blkbk_ids, + .probe = xen_blkbk_probe, + .remove = xen_blkbk_remove, .otherend_changed = frontend_changed }; -int blkif_xenbus_init(void) +int xen_blkif_xenbus_init(void) { - return xenbus_register_backend(&blkback); + return xenbus_register_backend(&xen_blkbk); } -- cgit v1.2.3 From 97961ef46b9b5a6a7c918a38b898a7b3e49869f4 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 26 Apr 2011 12:57:59 -0400 Subject: xen/blkback: Move the plugging/unplugging to a higher level. We used to the plug/unplug on the submit_bio. But that means if within a stream of WRITE, WRITE, WRITE,...,WRITE we have one READ, it could stall the pipeline (as the 'submio_bio' could trigger the unplug_fnc to be called and stall/sync when doing the READ). Instead we want to move the unplugging when the whole (or as a much as possible) ring buffer has been processed. This also eliminates us doing plug/unplug for each request. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index c4bc85e69d33..ed85ba94b2e0 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -276,6 +276,8 @@ int xen_blkif_schedule(void *arg) printk(KERN_DEBUG "%s: started\n", current->comm); while (!kthread_should_stop()) { + struct blk_plug plug; + if (try_to_freeze()) continue; if (unlikely(vbd->size != vbd_sz(vbd))) @@ -292,9 +294,13 @@ int xen_blkif_schedule(void *arg) blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ + blk_start_plug(&plug); + if (do_block_io_op(blkif)) blkif->waiting_reqs = 1; + blk_finish_plug(&plug); + if (log_stats && time_after(jiffies, blkif->st_print)) print_stats(blkif); } @@ -547,7 +553,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i, nbio = 0; int operation; - struct blk_plug plug; switch (req->operation) { case BLKIF_OP_READ: @@ -660,15 +665,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, */ atomic_set(&pending_req->pendcnt, nbio); - /* Get a reference count for the disk queue and start sending I/O */ - blk_start_plug(&plug); - for (i = 0; i < nbio; i++) submit_bio(operation, biolist[i]); - blk_finish_plug(&plug); - /* Let the I/Os go.. */ - if (operation == READ) blkif->st_rd_sect += preq.nr_sects; else if (operation == WRITE || operation == WRITE_BARRIER) -- cgit v1.2.3 From 013c3ca184851078b9c04744efd4d47e52c6ecf8 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 26 Apr 2011 16:24:18 -0400 Subject: xen/blkback: Stick REQ_SYNC on WRITEs to deal with CFQ I/O scheduler. If one runs a simple fio request with random read/write with a 20%/80% ratio, the numbers are incredibly bad when using the CFQ scheduler. IOmeter | | | | 64K, randrw | NOOP | CFQ | deadline | randrwmix=80 | | | | --------------+-------+------+----------+ blkback |103/27 |32/10 | 102/27 | --------------+-------+------+----------+ QEMU qdisk |103/27 |102/27| 102/27 | The problem as explained by Vivek Goyal was: ".. that difference is that sync vs async requests. In the case of a kernel thread submitting IO, [..] all the WRITES might be being considered as async and will go in a different queue. If you mix those with some READS, they are always sync and will go in differnet queue. In presence of sync queue, CFQ will idle and choke up WRITES in an attempt to improve latencies of READs. In case of AIO [note: this is what QEMU qdisk is doing] , [..] it is direct IO and both READS and WRITES will be considered SYNC and will go in a single queue and no choking of WRITES will take place." The solution is quite simple, tack on REQ_SYNC (which is what the WRITE_ODIRECT macro points to) and the numbers go back up. Suggested-by: Vivek Goyal --- drivers/block/xen-blkback/blkback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index ed85ba94b2e0..8583b130499a 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -559,7 +559,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, operation = READ; break; case BLKIF_OP_WRITE: - operation = WRITE; + operation = WRITE_ODIRECT; break; case BLKIF_OP_WRITE_BARRIER: operation = WRITE_BARRIER; -- cgit v1.2.3 From a19be5f0f073525306f6a4b000d90dc84065ed93 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 27 Apr 2011 12:40:11 -0400 Subject: Revert "xen/blkback: Move the plugging/unplugging to a higher level." This reverts commit 97961ef46b9b5a6a7c918a38b898a7b3e49869f4 b/c we lose about 15% performance if we do the unplugging and the end of the reading the ring buffer. --- drivers/block/xen-blkback/blkback.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 8583b130499a..eb068d0b47ea 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -276,8 +276,6 @@ int xen_blkif_schedule(void *arg) printk(KERN_DEBUG "%s: started\n", current->comm); while (!kthread_should_stop()) { - struct blk_plug plug; - if (try_to_freeze()) continue; if (unlikely(vbd->size != vbd_sz(vbd))) @@ -294,13 +292,9 @@ int xen_blkif_schedule(void *arg) blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ - blk_start_plug(&plug); - if (do_block_io_op(blkif)) blkif->waiting_reqs = 1; - blk_finish_plug(&plug); - if (log_stats && time_after(jiffies, blkif->st_print)) print_stats(blkif); } @@ -553,6 +547,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i, nbio = 0; int operation; + struct blk_plug plug; switch (req->operation) { case BLKIF_OP_READ: @@ -665,9 +660,15 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, */ atomic_set(&pending_req->pendcnt, nbio); + /* Get a reference count for the disk queue and start sending I/O */ + blk_start_plug(&plug); + for (i = 0; i < nbio; i++) submit_bio(operation, biolist[i]); + blk_finish_plug(&plug); + /* Let the I/Os go.. */ + if (operation == READ) blkif->st_rd_sect += preq.nr_sects; else if (operation == WRITE || operation == WRITE_BARRIER) -- cgit v1.2.3 From 24f567f952aa308c3352f3340b9d296fc72bd066 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 4 May 2011 17:07:27 -0400 Subject: xen/blkback: Add support for BLKIF_OP_FLUSH_DISKCACHE and drop BLKIF_OP_WRITE_BARRIER. We drop the support for 'feature-barrier' and add in the support for the 'feature-flush-cache' if the real backend storage supports flushing. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 33 ++++++++++++++++++--------------- drivers/block/xen-blkback/common.h | 7 ++++--- drivers/block/xen-blkback/xenbus.c | 19 ++++++++++++------- 3 files changed, 34 insertions(+), 25 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index eb068d0b47ea..72ede0bf2697 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -46,8 +46,6 @@ #include #include "common.h" -#define WRITE_BARRIER (REQ_WRITE | REQ_FLUSH | REQ_FUA) - /* * These are rather arbitrary. They are fairly large because adjacent requests * pulled from a communication ring are quite likely to end up being part of @@ -256,9 +254,9 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct blkif_st *blkif) { - printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | f %4d\n", current->comm, blkif->st_oo_req, - blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req); + blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); blkif->st_rd_req = 0; blkif->st_wr_req = 0; @@ -414,10 +412,10 @@ static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_ static void __end_block_io_op(struct pending_req *pending_req, int error) { /* An error fails the entire request. */ - if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && + if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && (error == -EOPNOTSUPP)) { - DPRINTK("blkback: write barrier op failed, not supported\n"); - xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); + DPRINTK("blkback: flush diskcache op failed, not supported\n"); + xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if (error) { DPRINTK("Buffer not up-to-date at end of operation, " @@ -506,13 +504,14 @@ static int do_block_io_op(struct blkif_st *blkif) blkif->st_rd_req++; dispatch_rw_block_io(blkif, &req, pending_req); break; - case BLKIF_OP_WRITE_BARRIER: - blkif->st_br_req++; + case BLKIF_OP_FLUSH_DISKCACHE: + blkif->st_f_req++; /* fall through */ case BLKIF_OP_WRITE: blkif->st_wr_req++; dispatch_rw_block_io(blkif, &req, pending_req); break; + case BLKIF_OP_WRITE_BARRIER: default: /* A good sign something is wrong: sleep for a while to * avoid excessive CPU consumption by a bad guest. */ @@ -556,9 +555,14 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, case BLKIF_OP_WRITE: operation = WRITE_ODIRECT; break; - case BLKIF_OP_WRITE_BARRIER: - operation = WRITE_BARRIER; + case BLKIF_OP_FLUSH_DISKCACHE: + operation = WRITE_FLUSH; + /* The frontend likes to set this to -1, which vbd_translate + * is alergic too. */ + req->u.rw.sector_number = 0; break; + case BLKIF_OP_WRITE_BARRIER: + /* Should never get here. */ default: operation = 0; /* make gcc happy */ BUG(); @@ -566,7 +570,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, /* Check that the number of segments is sane. */ nseg = req->nr_segments; - if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || + if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { DPRINTK("Bad number of segments in request (%d)\n", nseg); /* Haven't submitted any bio's yet. */ @@ -643,7 +647,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, /* This will be hit if the operation was a barrier. */ if (!bio) { - BUG_ON(operation != WRITE_BARRIER); + BUG_ON(operation != WRITE_FLUSH); bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -651,7 +655,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, bio->bi_bdev = preq.bdev; bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; - bio->bi_sector = -1; } @@ -671,7 +674,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, if (operation == READ) blkif->st_rd_sect += preq.nr_sects; - else if (operation == WRITE || operation == WRITE_BARRIER) + else if (operation == WRITE || operation == WRITE_FLUSH) blkif->st_wr_sect += preq.nr_sects; return; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 16af388268e7..af93837e1295 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -53,6 +53,7 @@ struct vbd { u32 pdevice; /* phys device that this vbd maps to */ struct block_device *bdev; sector_t size; /* Cached size parameter */ + bool flush_support; }; struct backend_info; @@ -85,7 +86,7 @@ struct blkif_st { int st_rd_req; int st_wr_req; int st_oo_req; - int st_br_req; + int st_f_req; int st_rd_sect; int st_wr_sect; @@ -120,8 +121,8 @@ int xen_blkif_xenbus_init(void); irqreturn_t xen_blkif_be_int(int irq, void *dev_id); int xen_blkif_schedule(void *arg); -int xen_blkbk_barrier(struct xenbus_transaction xbt, - struct backend_info *be, int state); +int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, + struct backend_info *be, int state); struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 64b0a1c760fb..9adcf806f83f 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -276,7 +276,7 @@ int __init xen_blkif_interface_init(void) VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); -VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req); +VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); @@ -284,7 +284,7 @@ static struct attribute *vbdstat_attrs[] = { &dev_attr_oo_req.attr, &dev_attr_rd_req.attr, &dev_attr_wr_req.attr, - &dev_attr_br_req.attr, + &dev_attr_f_req.attr, &dev_attr_rd_sect.attr, &dev_attr_wr_sect.attr, NULL @@ -343,6 +343,7 @@ static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, { struct vbd *vbd; struct block_device *bdev; + struct request_queue *q; vbd = &blkif->vbd; vbd->handle = handle; @@ -375,6 +376,10 @@ static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) vbd->type |= VDISK_REMOVABLE; + q = bdev_get_queue(bdev); + if (q && q->flush_flags) + vbd->flush_support = true; + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", handle, blkif->domid); return 0; @@ -406,16 +411,16 @@ static int xen_blkbk_remove(struct xenbus_device *dev) return 0; } -int xen_blkbk_barrier(struct xenbus_transaction xbt, - struct backend_info *be, int state) +int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, + struct backend_info *be, int state) { struct xenbus_device *dev = be->dev; int err; - err = xenbus_printf(xbt, dev->nodename, "feature-barrier", + err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache", "%d", state); if (err) - xenbus_dev_fatal(dev, err, "writing feature-barrier"); + xenbus_dev_fatal(dev, err, "writing feature-flush-cache"); return err; } @@ -642,7 +647,7 @@ again: return; } - err = xen_blkbk_barrier(xbt, be, 1); + err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support); if (err) goto abort; -- cgit v1.2.3 From fc53bf757ede292312eee10d64f4e691c8c8cebf Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 5 May 2011 13:37:23 -0400 Subject: xen/blkback: Squash the checking for operation into dispatch_rw_block_io We do a check for the operations right before calling dispatch_rw_block_io. And then we do the same check in dispatch_rw_block_io. This patch squashes those checks into the 'dispatch_rw_block_io' function. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 45 +++++++++++-------------------------- 1 file changed, 13 insertions(+), 32 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 72ede0bf2697..5f4284729a3a 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -123,9 +123,9 @@ static inline unsigned long vaddr(struct pending_req *req, int seg) static int do_block_io_op(struct blkif_st *blkif); -static void dispatch_rw_block_io(struct blkif_st *blkif, - struct blkif_request *req, - struct pending_req *pending_req); +static int dispatch_rw_block_io(struct blkif_st *blkif, + struct blkif_request *req, + struct pending_req *pending_req); static void make_response(struct blkif_st *blkif, u64 id, unsigned short op, int st); @@ -499,30 +499,8 @@ static int do_block_io_op(struct blkif_st *blkif) /* Apply all sanity checks to /private copy/ of request. */ barrier(); - switch (req.operation) { - case BLKIF_OP_READ: - blkif->st_rd_req++; - dispatch_rw_block_io(blkif, &req, pending_req); - break; - case BLKIF_OP_FLUSH_DISKCACHE: - blkif->st_f_req++; - /* fall through */ - case BLKIF_OP_WRITE: - blkif->st_wr_req++; - dispatch_rw_block_io(blkif, &req, pending_req); + if (dispatch_rw_block_io(blkif, &req, pending_req)) break; - case BLKIF_OP_WRITE_BARRIER: - default: - /* A good sign something is wrong: sleep for a while to - * avoid excessive CPU consumption by a bad guest. */ - msleep(1); - DPRINTK("error: unknown block io operation [%d]\n", - req.operation); - make_response(blkif, req.id, req.operation, - BLKIF_RSP_ERROR); - free_req(pending_req); - break; - } /* Yield point for this unbounded loop. */ cond_resched(); @@ -535,7 +513,7 @@ static int do_block_io_op(struct blkif_st *blkif) * Transumation of the 'struct blkif_request' to a proper 'struct bio' * and call the 'submit_bio' to pass it to the underlaying storage. */ -static void dispatch_rw_block_io(struct blkif_st *blkif, +static int dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, struct pending_req *pending_req) { @@ -550,22 +528,25 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, switch (req->operation) { case BLKIF_OP_READ: + blkif->st_rd_req++; operation = READ; break; case BLKIF_OP_WRITE: + blkif->st_wr_req++; operation = WRITE_ODIRECT; break; case BLKIF_OP_FLUSH_DISKCACHE: + blkif->st_f_req++; operation = WRITE_FLUSH; /* The frontend likes to set this to -1, which vbd_translate * is alergic too. */ req->u.rw.sector_number = 0; break; case BLKIF_OP_WRITE_BARRIER: - /* Should never get here. */ default: operation = 0; /* make gcc happy */ - BUG(); + goto fail_response; + break; } /* Check that the number of segments is sane. */ @@ -677,7 +658,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, else if (operation == WRITE || operation == WRITE_FLUSH) blkif->st_wr_sect += preq.nr_sects; - return; + return 0; fail_flush: xen_blkbk_unmap(pending_req); @@ -686,14 +667,14 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); free_req(pending_req); msleep(1); /* back off a bit */ - return; + return -EIO; fail_put_bio: for (i = 0; i < (nbio-1); i++) bio_put(biolist[i]); __end_block_io_op(pending_req, -EINVAL); msleep(1); /* back off a bit */ - return; + return -EIO; } -- cgit v1.2.3 From 3d68b39926b3b247d76cc4da0256e979b2b730e3 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 5 May 2011 13:42:10 -0400 Subject: xen/blkback: Fix up some of the comments. They had the wrong data or were in the wrong spot. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 5f4284729a3a..b9bdd9e43ab9 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -603,7 +603,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, if (xen_blkbk_map(req, pending_req, seg)) goto fail_flush; - /* This corresponding blkif_put is done in __end_block_io_op */ + /* This corresponding xen_blkif_put is done in __end_block_io_op */ xen_blkif_get(blkif); for (i = 0; i < nseg; i++) { @@ -626,7 +626,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, preq.sector_number += seg[i].nsec; } - /* This will be hit if the operation was a barrier. */ + /* This will be hit if the operation was a flush. */ if (!bio) { BUG_ON(operation != WRITE_FLUSH); bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); @@ -650,8 +650,8 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, for (i = 0; i < nbio; i++) submit_bio(operation, biolist[i]); - blk_finish_plug(&plug); /* Let the I/Os go.. */ + blk_finish_plug(&plug); if (operation == READ) blkif->st_rd_sect += preq.nr_sects; -- cgit v1.2.3 From 9bd3c20487b7c13d397dc11dd51e30256bf4c9b3 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:52:54 -0500 Subject: cciss: add readl after writel in interrupt mask setting code This is to ensure the board interrupts are really off when these functions return. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 554bbd907d14..9b494392e5d5 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -239,11 +239,13 @@ static void SA5_intr_mask(ctlr_info_t *h, unsigned long val) { /* Turn interrupts on */ h->interrupts_enabled = 1; writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); + (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); } else /* Turn them off */ { h->interrupts_enabled = 0; writel( SA5_INTR_OFF, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); + (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); } } /* @@ -257,11 +259,13 @@ static void SA5B_intr_mask(ctlr_info_t *h, unsigned long val) { /* Turn interrupts on */ h->interrupts_enabled = 1; writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); + (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); } else /* Turn them off */ { h->interrupts_enabled = 0; writel( SA5B_INTR_OFF, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); + (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); } } @@ -271,10 +275,12 @@ static void SA5_performant_intr_mask(ctlr_info_t *h, unsigned long val) if (val) { /* turn on interrupts */ h->interrupts_enabled = 1; writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); + (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); } else { h->interrupts_enabled = 0; writel(SA5_PERF_INTR_OFF, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); + (void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET); } } -- cgit v1.2.3 From 62710ae1ceb839de1eebb5b4492ec8a7fbcf8d02 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:53:00 -0500 Subject: cciss: do a better job of detecting controller reset failure Detect failure of controller reset by noticing if the 32 bytes of "driver version" we store on the hardware in the config table fail to get zeroed out. Previously we noticed if the controller did not transition to "simple mode", but this did not detect reset failure if the controller was already in simple mode prior to the reset attempt (e.g. due to module parameter hpsa_simple_mode=1). Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 82 +++++++++++++++++++++++++++++++++++++++++------ drivers/block/cciss_cmd.h | 1 + 2 files changed, 73 insertions(+), 10 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 9bf13988f1a2..ed6aa83d86dd 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -194,6 +194,8 @@ static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev, static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev, unsigned long *memory_bar); static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag); +static __devinit int write_driver_ver_to_cfgtable( + CfgTable_struct __iomem *cfgtable); /* performant mode helper functions */ static void calc_bucket_map(int *bucket, int num_buckets, int nsgs, @@ -4078,6 +4080,9 @@ static int __devinit cciss_find_cfgtables(ctlr_info_t *h) cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable)); if (!h->cfgtable) return -ENOMEM; + rc = write_driver_ver_to_cfgtable(h->cfgtable); + if (rc) + return rc; /* Find performant mode table. */ trans_offset = readl(&h->cfgtable->TransMethodOffset); h->transtable = remap_pci_mem(pci_resource_start(h->pdev, @@ -4428,6 +4433,60 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev, return 0; } +static __devinit void init_driver_version(char *driver_version, int len) +{ + memset(driver_version, 0, len); + strncpy(driver_version, "cciss " DRIVER_NAME, len - 1); +} + +static __devinit int write_driver_ver_to_cfgtable( + CfgTable_struct __iomem *cfgtable) +{ + char *driver_version; + int i, size = sizeof(cfgtable->driver_version); + + driver_version = kmalloc(size, GFP_KERNEL); + if (!driver_version) + return -ENOMEM; + + init_driver_version(driver_version, size); + for (i = 0; i < size; i++) + writeb(driver_version[i], &cfgtable->driver_version[i]); + kfree(driver_version); + return 0; +} + +static __devinit void read_driver_ver_from_cfgtable( + CfgTable_struct __iomem *cfgtable, unsigned char *driver_ver) +{ + int i; + + for (i = 0; i < sizeof(cfgtable->driver_version); i++) + driver_ver[i] = readb(&cfgtable->driver_version[i]); +} + +static __devinit int controller_reset_failed( + CfgTable_struct __iomem *cfgtable) +{ + + char *driver_ver, *old_driver_ver; + int rc, size = sizeof(cfgtable->driver_version); + + old_driver_ver = kmalloc(2 * size, GFP_KERNEL); + if (!old_driver_ver) + return -ENOMEM; + driver_ver = old_driver_ver + size; + + /* After a reset, the 32 bytes of "driver version" in the cfgtable + * should have been changed, otherwise we know the reset failed. + */ + init_driver_version(old_driver_ver, size); + read_driver_ver_from_cfgtable(cfgtable, driver_ver); + rc = !memcmp(driver_ver, old_driver_ver, size); + kfree(old_driver_ver); + return rc; +} + /* This does a hard reset of the controller using PCI power management * states or using the doorbell register. */ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) @@ -4437,7 +4496,7 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) u64 cfg_base_addr_index; void __iomem *vaddr; unsigned long paddr; - u32 misc_fw_support, active_transport; + u32 misc_fw_support; int rc; CfgTable_struct __iomem *cfgtable; bool use_doorbell; @@ -4497,6 +4556,9 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) rc = -ENOMEM; goto unmap_vaddr; } + rc = write_driver_ver_to_cfgtable(cfgtable); + if (rc) + goto unmap_vaddr; /* If reset via doorbell register is supported, use that. */ misc_fw_support = readl(&cfgtable->misc_fw_support); @@ -4535,21 +4597,21 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) "failed waiting for board to become ready\n"); goto unmap_cfgtable; } - dev_info(&pdev->dev, "board ready.\n"); - /* Controller should be in simple mode at this point. If it's not, - * It means we're on one of those controllers which doesn't support - * the doorbell reset method and on which the PCI power management reset - * method doesn't work (P800, for example.) - * In those cases, don't try to proceed, as it generally doesn't work. - */ - active_transport = readl(&cfgtable->TransportActive); - if (active_transport & PERFORMANT_MODE) { + rc = controller_reset_failed(vaddr); + if (rc < 0) + goto unmap_cfgtable; + if (rc) { dev_warn(&pdev->dev, "Unable to successfully reset controller," " Ignoring controller.\n"); rc = -ENODEV; + goto unmap_cfgtable; + } else { + dev_info(&pdev->dev, "board ready.\n"); } + dev_info(&pdev->dev, "board ready.\n"); + unmap_cfgtable: iounmap(cfgtable); diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index cd441bef031f..a2e68d21efe3 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h @@ -235,6 +235,7 @@ typedef struct _CfgTable_struct { u8 reserved[0x78 - 0x58]; u32 misc_fw_support; /* offset 0x78 */ #define MISC_FW_DOORBELL_RESET (0x02) + u8 driver_version[32]; } CfgTable_struct; struct TransTable_struct { -- cgit v1.2.3 From 54dae3432021f38cf20542ccd152dddb91c7c2d7 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:53:05 -0500 Subject: cciss: factor out command pool allocation functions Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 67 +++++++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 31 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index ed6aa83d86dd..152cb40e2e28 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4653,6 +4653,39 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev) return 0; } +static __devinit int cciss_allocate_cmd_pool(ctlr_info_t *h) +{ + h->cmd_pool_bits = kmalloc( + DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) * + sizeof(unsigned long), GFP_KERNEL); + h->cmd_pool = pci_alloc_consistent(h->pdev, + h->nr_cmds * sizeof(CommandList_struct), + &(h->cmd_pool_dhandle)); + h->errinfo_pool = pci_alloc_consistent(h->pdev, + h->nr_cmds * sizeof(ErrorInfo_struct), + &(h->errinfo_pool_dhandle)); + if ((h->cmd_pool_bits == NULL) + || (h->cmd_pool == NULL) + || (h->errinfo_pool == NULL)) { + dev_err(&h->pdev->dev, "out of memory"); + return -ENOMEM; + } + return 0; +} + +static void cciss_free_cmd_pool(ctlr_info_t *h) +{ + kfree(h->cmd_pool_bits); + if (h->cmd_pool) + pci_free_consistent(h->pdev, + h->nr_cmds * sizeof(CommandList_struct), + h->cmd_pool, h->cmd_pool_dhandle); + if (h->errinfo_pool) + pci_free_consistent(h->pdev, + h->nr_cmds * sizeof(ErrorInfo_struct), + h->errinfo_pool, h->errinfo_pool_dhandle); +} + /* * This is it. Find all the controllers and register them. I really hate * stealing all these major device numbers. @@ -4745,23 +4778,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, h->devname, pdev->device, pci_name(pdev), h->intr[PERF_MODE_INT], dac ? "" : " not"); - h->cmd_pool_bits = - kmalloc(DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) - * sizeof(unsigned long), GFP_KERNEL); - h->cmd_pool = (CommandList_struct *) - pci_alloc_consistent(h->pdev, - h->nr_cmds * sizeof(CommandList_struct), - &(h->cmd_pool_dhandle)); - h->errinfo_pool = (ErrorInfo_struct *) - pci_alloc_consistent(h->pdev, - h->nr_cmds * sizeof(ErrorInfo_struct), - &(h->errinfo_pool_dhandle)); - if ((h->cmd_pool_bits == NULL) - || (h->cmd_pool == NULL) - || (h->errinfo_pool == NULL)) { - dev_err(&h->pdev->dev, "out of memory"); + if (cciss_allocate_cmd_pool(h)) goto clean4; - } /* Need space for temp scatter list */ h->scatter_list = kmalloc(h->max_commands * @@ -4837,21 +4855,12 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, return 1; clean4: - kfree(h->cmd_pool_bits); + cciss_free_cmd_pool(h); /* Free up sg elements */ for (k-- ; k >= 0; k--) kfree(h->scatter_list[k]); kfree(h->scatter_list); cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); - if (h->cmd_pool) - pci_free_consistent(h->pdev, - h->nr_cmds * sizeof(CommandList_struct), - h->cmd_pool, h->cmd_pool_dhandle); - if (h->errinfo_pool) - pci_free_consistent(h->pdev, - h->nr_cmds * sizeof(ErrorInfo_struct), - h->errinfo_pool, - h->errinfo_pool_dhandle); free_irq(h->intr[PERF_MODE_INT], h); clean2: unregister_blkdev(h->major, h->devname); @@ -4949,11 +4958,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) iounmap(h->cfgtable); iounmap(h->vaddr); - pci_free_consistent(h->pdev, h->nr_cmds * sizeof(CommandList_struct), - h->cmd_pool, h->cmd_pool_dhandle); - pci_free_consistent(h->pdev, h->nr_cmds * sizeof(ErrorInfo_struct), - h->errinfo_pool, h->errinfo_pool_dhandle); - kfree(h->cmd_pool_bits); + cciss_free_cmd_pool(h); /* Free up sg elements */ for (j = 0; j < h->nr_cmds; j++) kfree(h->scatter_list[j]); -- cgit v1.2.3 From abf7966e616ef6e04393ef3678227f77d6179a8a Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:53:10 -0500 Subject: cciss: factor out scatterlist allocation functions Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 55 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 20 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 152cb40e2e28..55ca1f45c0c7 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4673,6 +4673,39 @@ static __devinit int cciss_allocate_cmd_pool(ctlr_info_t *h) return 0; } +static __devinit int cciss_allocate_scatterlists(ctlr_info_t *h) +{ + int i; + + /* zero it, so that on free we need not know how many were alloc'ed */ + h->scatter_list = kzalloc(h->max_commands * + sizeof(struct scatterlist *), GFP_KERNEL); + if (!h->scatter_list) + return -ENOMEM; + + for (i = 0; i < h->nr_cmds; i++) { + h->scatter_list[i] = kmalloc(sizeof(struct scatterlist) * + h->maxsgentries, GFP_KERNEL); + if (h->scatter_list[i] == NULL) { + dev_err(&h->pdev->dev, "could not allocate " + "s/g lists\n"); + return -ENOMEM; + } + } + return 0; +} + +static void cciss_free_scatterlists(ctlr_info_t *h) +{ + int i; + + if (h->scatter_list) { + for (i = 0; i < h->nr_cmds; i++) + kfree(h->scatter_list[i]); + kfree(h->scatter_list); + } +} + static void cciss_free_cmd_pool(ctlr_info_t *h) { kfree(h->cmd_pool_bits); @@ -4696,7 +4729,6 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, { int i; int j = 0; - int k = 0; int rc; int dac, return_code; InquiryData_struct *inq_buff; @@ -4781,23 +4813,9 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, if (cciss_allocate_cmd_pool(h)) goto clean4; - /* Need space for temp scatter list */ - h->scatter_list = kmalloc(h->max_commands * - sizeof(struct scatterlist *), - GFP_KERNEL); - if (!h->scatter_list) + if (cciss_allocate_scatterlists(h)) goto clean4; - for (k = 0; k < h->nr_cmds; k++) { - h->scatter_list[k] = kmalloc(sizeof(struct scatterlist) * - h->maxsgentries, - GFP_KERNEL); - if (h->scatter_list[k] == NULL) { - dev_err(&h->pdev->dev, - "could not allocate s/g lists\n"); - goto clean4; - } - } h->cmd_sg_list = cciss_allocate_sg_chain_blocks(h, h->chainsize, h->nr_cmds); if (!h->cmd_sg_list && h->chainsize > 0) @@ -4856,10 +4874,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, clean4: cciss_free_cmd_pool(h); - /* Free up sg elements */ - for (k-- ; k >= 0; k--) - kfree(h->scatter_list[k]); - kfree(h->scatter_list); + cciss_free_scatterlists(h); cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); free_irq(h->intr[PERF_MODE_INT], h); clean2: -- cgit v1.2.3 From 2b48085f972a761b38cf60c626031a7fdd9e6d55 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:53:16 -0500 Subject: cciss: factor out irq request code Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 55ca1f45c0c7..63fe05af2c5d 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4719,6 +4719,28 @@ static void cciss_free_cmd_pool(ctlr_info_t *h) h->errinfo_pool, h->errinfo_pool_dhandle); } +static int cciss_request_irq(ctlr_info_t *h, + irqreturn_t (*msixhandler)(int, void *), + irqreturn_t (*intxhandler)(int, void *)) +{ + if (h->msix_vector || h->msi_vector) { + if (!request_irq(h->intr[PERF_MODE_INT], msixhandler, + IRQF_DISABLED, h->devname, h)) + return 0; + dev_err(&h->pdev->dev, "Unable to get msi irq %d" + " for %s\n", h->intr[PERF_MODE_INT], + h->devname); + return -1; + } + + if (!request_irq(h->intr[PERF_MODE_INT], intxhandler, + IRQF_DISABLED, h->devname, h)) + return 0; + dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n", + h->intr[PERF_MODE_INT], h->devname); + return -1; +} + /* * This is it. Find all the controllers and register them. I really hate * stealing all these major device numbers. @@ -4789,22 +4811,9 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, /* make sure the board interrupts are off */ h->access.set_intr_mask(h, CCISS_INTR_OFF); - if (h->msi_vector || h->msix_vector) { - if (request_irq(h->intr[PERF_MODE_INT], - do_cciss_msix_intr, - IRQF_DISABLED, h->devname, h)) { - dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n", - h->intr[PERF_MODE_INT], h->devname); - goto clean2; - } - } else { - if (request_irq(h->intr[PERF_MODE_INT], do_cciss_intx, - IRQF_DISABLED, h->devname, h)) { - dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n", - h->intr[PERF_MODE_INT], h->devname); - goto clean2; - } - } + rc = cciss_request_irq(h, do_cciss_msix_intr, do_cciss_intx); + if (rc) + goto clean2; dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n", h->devname, pdev->device, pci_name(pdev), -- cgit v1.2.3 From e363e0143615a67f19d56e6b223b55df3bd9f580 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:53:21 -0500 Subject: cciss: fix reply pool and block fetch table memory leaks Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 63fe05af2c5d..d60448941d53 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4988,6 +4988,10 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) kfree(h->scatter_list[j]); kfree(h->scatter_list); cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); + kfree(h->blockFetchTable); + if (h->reply_pool) + pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64), + h->reply_pool, h->reply_pool_dhandle); /* * Deliberately omit pci_disable_device(): it does something nasty to * Smart Array controllers that pci_enable_device does not undo -- cgit v1.2.3 From 8f71bb829a964ef4deead86b60fda09452fb5c2f Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:53:26 -0500 Subject: cciss: get rid of message related magic numbers Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 8 ++++---- drivers/block/cciss_cmd.h | 8 ++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index d60448941d53..1a4dff09c541 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2569,7 +2569,7 @@ static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff, } } else if (cmd_type == TYPE_MSG) { switch (cmd) { - case 0: /* ABORT message */ + case CCISS_ABORT_MSG: c->Request.CDBLen = 12; c->Request.Type.Attribute = ATTR_SIMPLE; c->Request.Type.Direction = XFER_WRITE; @@ -2579,16 +2579,16 @@ static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff, /* buff contains the tag of the command to abort */ memcpy(&c->Request.CDB[4], buff, 8); break; - case 1: /* RESET message */ + case CCISS_RESET_MSG: c->Request.CDBLen = 16; c->Request.Type.Attribute = ATTR_SIMPLE; c->Request.Type.Direction = XFER_NONE; c->Request.Timeout = 0; memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB)); c->Request.CDB[0] = cmd; /* reset */ - c->Request.CDB[1] = 0x03; /* reset a target */ + c->Request.CDB[1] = CCISS_RESET_TYPE_TARGET; break; - case 3: /* No-Op message */ + case CCISS_NOOP_MSG: c->Request.CDBLen = 1; c->Request.Type.Attribute = ATTR_SIMPLE; c->Request.Type.Direction = XFER_WRITE; diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index a2e68d21efe3..3b20c31d746d 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h @@ -142,6 +142,14 @@ typedef struct _ReadCapdata_struct_16 #define BMIC_CACHE_FLUSH 0xc2 #define CCISS_CACHE_FLUSH 0x01 /* C2 was already being used by CCISS */ +#define CCISS_ABORT_MSG 0x00 +#define CCISS_RESET_MSG 0x01 +#define CCISS_RESET_TYPE_CONTROLLER 0x00 +#define CCISS_RESET_TYPE_BUS 0x01 +#define CCISS_RESET_TYPE_TARGET 0x03 +#define CCISS_RESET_TYPE_LUN 0x04 +#define CCISS_NOOP_MSG 0x03 + /* Command List Structure */ #define CTLR_LUNID "\0\0\0\0\0\0\0\0" -- cgit v1.2.3 From 19adbb9254cbd46224376fcb3a773a2272e0845e Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:53:31 -0500 Subject: cciss: increase time to wait for board reset to start Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 9b494392e5d5..21ec628ef700 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -200,7 +200,7 @@ struct ctlr_info * the above. */ #define CCISS_BOARD_READY_WAIT_SECS (120) -#define CCISS_BOARD_NOT_READY_WAIT_SECS (10) +#define CCISS_BOARD_NOT_READY_WAIT_SECS (100) #define CCISS_BOARD_READY_POLL_INTERVAL_MSECS (100) #define CCISS_BOARD_READY_ITERATIONS \ ((CCISS_BOARD_READY_WAIT_SECS * 1000) / \ -- cgit v1.2.3 From 59ec86bb9872fbf9fd8572a936423f5e3ad615e7 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:53:36 -0500 Subject: cciss: clarify messages around reset behavior When waiting for the board to become "not ready" don't print a message saying "waiting for board to become ready" (possibly followed by a message saying "failed waiting for board to become not ready". Instead, it should be "waiting for board to reset" and "failed waiting for board to reset." Signed-off-by: Stephen M. Cameron " Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 1a4dff09c541..3b557231e1d3 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4586,11 +4586,11 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) msleep(CCISS_POST_RESET_PAUSE_MSECS); /* Wait for board to become not ready, then ready. */ - dev_info(&pdev->dev, "Waiting for board to become ready.\n"); + dev_info(&pdev->dev, "Waiting for board to reset.\n"); rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY); if (rc) /* Don't bail, might be E500, etc. which can't be reset */ dev_warn(&pdev->dev, - "failed waiting for board to become not ready\n"); + "failed waiting for board to reset\n"); rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY); if (rc) { dev_warn(&pdev->dev, @@ -4641,6 +4641,7 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev) return -ENODEV; /* Now try to get the controller to respond to a no-op */ + dev_warn(&pdev->dev, "Waiting for controller to respond to no-op\n"); for (i = 0; i < CCISS_POST_RESET_NOOP_RETRIES; i++) { if (cciss_noop(pdev) == 0) break; -- cgit v1.2.3 From 3e28601fdfdec75ce8f6aaaf58540fdd0883fb58 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:53:41 -0500 Subject: cciss: increase timeouts for post-reset no-ops Just to reduce the messages about timeouts that appear. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 2 +- drivers/block/cciss.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 3b557231e1d3..b9f8658341cc 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4353,7 +4353,7 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u tag = readl(vaddr + SA5_REPLY_PORT_OFFSET); if ((tag & ~3) == paddr32) break; - schedule_timeout_uninterruptible(HZ); + msleep(CCISS_POST_RESET_NOOP_TIMEOUT_MSECS); } iounmap(vaddr); diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 21ec628ef700..16b4d58d84dd 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -209,8 +209,9 @@ struct ctlr_info ((CCISS_BOARD_NOT_READY_WAIT_SECS * 1000) / \ CCISS_BOARD_READY_POLL_INTERVAL_MSECS) #define CCISS_POST_RESET_PAUSE_MSECS (3000) -#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (1000) +#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (4000) #define CCISS_POST_RESET_NOOP_RETRIES (12) +#define CCISS_POST_RESET_NOOP_TIMEOUT_MSECS (10000) /* Send the command to the hardware -- cgit v1.2.3 From bf2e2e6b87ae38fab460a36abfe272d99ae8be49 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:53:46 -0500 Subject: cciss: use new doorbell-bit-5 reset method The bit-2-doorbell reset method seemed to cause (survivable) NMIs on some systems and (unsurvivable) IOCK NMIs on some G7 servers. Firmware guys implemented a new doorbell method to alleviate these problems triggered by bit 5 of the doorbell register. We want to use it if it's available. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 25 ++++++++++++++----------- drivers/block/cciss_cmd.h | 2 ++ 2 files changed, 16 insertions(+), 11 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index b9f8658341cc..df58c59d9031 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4384,7 +4384,7 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u #define cciss_noop(p) cciss_message(p, 3, 0) static int cciss_controller_hard_reset(struct pci_dev *pdev, - void * __iomem vaddr, bool use_doorbell) + void * __iomem vaddr, u32 use_doorbell) { u16 pmcsr; int pos; @@ -4395,7 +4395,7 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev, * other way using the doorbell register. */ dev_info(&pdev->dev, "using doorbell to reset controller\n"); - writel(DOORBELL_CTLR_RESET, vaddr + SA5_DOORBELL); + writel(use_doorbell, vaddr + SA5_DOORBELL); msleep(1000); } else { /* Try to do it the PCI power state way */ @@ -4499,7 +4499,7 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) u32 misc_fw_support; int rc; CfgTable_struct __iomem *cfgtable; - bool use_doorbell; + u32 use_doorbell; u32 board_id; u16 command_register; @@ -4560,15 +4560,18 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) if (rc) goto unmap_vaddr; - /* If reset via doorbell register is supported, use that. */ - misc_fw_support = readl(&cfgtable->misc_fw_support); - use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET; - - /* The doorbell reset seems to cause lockups on some Smart - * Arrays (e.g. P410, P410i, maybe others). Until this is - * fixed or at least isolated, avoid the doorbell reset. + /* If reset via doorbell register is supported, use that. + * There are two such methods. Favor the newest method. */ - use_doorbell = 0; + misc_fw_support = readl(&cfgtable->misc_fw_support); + use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET2; + if (use_doorbell) { + use_doorbell = DOORBELL_CTLR_RESET2; + } else { + use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET; + if (use_doorbell) + use_doorbell = DOORBELL_CTLR_RESET; + } rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell); if (rc) diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index 3b20c31d746d..d9be6b4d49a6 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h @@ -53,6 +53,7 @@ #define CFGTBL_ChangeReq 0x00000001l #define CFGTBL_AccCmds 0x00000001l #define DOORBELL_CTLR_RESET 0x00000004l +#define DOORBELL_CTLR_RESET2 0x00000020l #define CFGTBL_Trans_Simple 0x00000002l #define CFGTBL_Trans_Performant 0x00000004l @@ -243,6 +244,7 @@ typedef struct _CfgTable_struct { u8 reserved[0x78 - 0x58]; u32 misc_fw_support; /* offset 0x78 */ #define MISC_FW_DOORBELL_RESET (0x02) +#define MISC_FW_DOORBELL_RESET2 (0x10) u8 driver_version[32]; } CfgTable_struct; -- cgit v1.2.3 From 5afe278114a8dd9480813377c75b5e40a42c5066 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:53:52 -0500 Subject: cciss: do soft reset if hard reset is broken on driver load, if reset_devices is set, and the hard reset attempts fail, try to bring up the controller to the point that a command can be sent, and send it a soft reset command, then after the reset undo whatever driver initialization was done to get it to the point to take a command, and re-do it after the reset. This is to get kdump to work on all the "non-resettable" controllers (except 64xx controllers which can't be reset due to the potentially shared cache module.) Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 221 insertions(+), 15 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index df58c59d9031..23b0ba49300a 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2477,6 +2477,31 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, return 0; } +static int __devinit cciss_send_reset(ctlr_info_t *h, unsigned char *scsi3addr, + u8 reset_type) +{ + CommandList_struct *c; + int return_status; + + c = cmd_alloc(h); + if (!c) + return -ENOMEM; + return_status = fill_cmd(h, c, CCISS_RESET_MSG, NULL, 0, 0, + CTLR_LUNID, TYPE_MSG); + c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */ + if (return_status != IO_OK) { + cmd_special_free(h, c); + return return_status; + } + c->waiting = NULL; + enqueue_cmd_and_start_io(h, c); + /* Don't wait for completion, the reset won't complete. Don't free + * the command either. This is the last command we will send before + * re-initializing everything, so it doesn't matter and won't leak. + */ + return 0; +} + static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff, size_t size, __u8 page_code, unsigned char *scsi3addr, int cmd_type) @@ -3463,6 +3488,63 @@ static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag) return next_command(h); } +/* Some controllers, like p400, will give us one interrupt + * after a soft reset, even if we turned interrupts off. + * Only need to check for this in the cciss_xxx_discard_completions + * functions. + */ +static int ignore_bogus_interrupt(ctlr_info_t *h) +{ + if (likely(!reset_devices)) + return 0; + + if (likely(h->interrupts_enabled)) + return 0; + + dev_info(&h->pdev->dev, "Received interrupt while interrupts disabled " + "(known firmware bug.) Ignoring.\n"); + + return 1; +} + +static irqreturn_t cciss_intx_discard_completions(int irq, void *dev_id) +{ + ctlr_info_t *h = dev_id; + unsigned long flags; + u32 raw_tag; + + if (ignore_bogus_interrupt(h)) + return IRQ_NONE; + + if (interrupt_not_for_us(h)) + return IRQ_NONE; + spin_lock_irqsave(&h->lock, flags); + while (interrupt_pending(h)) { + raw_tag = get_next_completion(h); + while (raw_tag != FIFO_EMPTY) + raw_tag = next_command(h); + } + spin_unlock_irqrestore(&h->lock, flags); + return IRQ_HANDLED; +} + +static irqreturn_t cciss_msix_discard_completions(int irq, void *dev_id) +{ + ctlr_info_t *h = dev_id; + unsigned long flags; + u32 raw_tag; + + if (ignore_bogus_interrupt(h)) + return IRQ_NONE; + + spin_lock_irqsave(&h->lock, flags); + raw_tag = get_next_completion(h); + while (raw_tag != FIFO_EMPTY) + raw_tag = next_command(h); + spin_unlock_irqrestore(&h->lock, flags); + return IRQ_HANDLED; +} + static irqreturn_t do_cciss_intx(int irq, void *dev_id) { ctlr_info_t *h = dev_id; @@ -4380,7 +4462,6 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u return 0; } -#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0) #define cciss_noop(p) cciss_message(p, 3, 0) static int cciss_controller_hard_reset(struct pci_dev *pdev, @@ -4591,13 +4672,17 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) /* Wait for board to become not ready, then ready. */ dev_info(&pdev->dev, "Waiting for board to reset.\n"); rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY); - if (rc) /* Don't bail, might be E500, etc. which can't be reset */ - dev_warn(&pdev->dev, - "failed waiting for board to reset\n"); + if (rc) { + dev_warn(&pdev->dev, "Failed waiting for board to hard reset." + " Will try soft reset.\n"); + rc = -ENOTSUPP; /* Not expected, but try soft reset later */ + goto unmap_cfgtable; + } rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY); if (rc) { dev_warn(&pdev->dev, - "failed waiting for board to become ready\n"); + "failed waiting for board to become ready " + "after hard reset\n"); goto unmap_cfgtable; } @@ -4605,16 +4690,13 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) if (rc < 0) goto unmap_cfgtable; if (rc) { - dev_warn(&pdev->dev, "Unable to successfully reset controller," - " Ignoring controller.\n"); - rc = -ENODEV; - goto unmap_cfgtable; + dev_warn(&pdev->dev, "Unable to successfully hard reset " + "controller. Will try soft reset.\n"); + rc = -ENOTSUPP; /* Not expected, but try soft reset later */ } else { - dev_info(&pdev->dev, "board ready.\n"); + dev_info(&pdev->dev, "Board ready after hard reset.\n"); } - dev_info(&pdev->dev, "board ready.\n"); - unmap_cfgtable: iounmap(cfgtable); @@ -4639,7 +4721,7 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev) * due to concerns about shared bbwc between 6402/6404 pair. */ if (rc == -ENOTSUPP) - return 0; /* just try to do the kdump anyhow. */ + return rc; /* just try to do the kdump anyhow. */ if (rc) return -ENODEV; @@ -4745,6 +4827,60 @@ static int cciss_request_irq(ctlr_info_t *h, return -1; } +static int __devinit cciss_kdump_soft_reset(ctlr_info_t *h) +{ + if (cciss_send_reset(h, CTLR_LUNID, CCISS_RESET_TYPE_CONTROLLER)) { + dev_warn(&h->pdev->dev, "Resetting array controller failed.\n"); + return -EIO; + } + + dev_info(&h->pdev->dev, "Waiting for board to soft reset.\n"); + if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_NOT_READY)) { + dev_warn(&h->pdev->dev, "Soft reset had no effect.\n"); + return -1; + } + + dev_info(&h->pdev->dev, "Board reset, awaiting READY status.\n"); + if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY)) { + dev_warn(&h->pdev->dev, "Board failed to become ready " + "after soft reset.\n"); + return -1; + } + + return 0; +} + +static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h) +{ + int ctlr = h->ctlr; + + free_irq(h->intr[PERF_MODE_INT], h); +#ifdef CONFIG_PCI_MSI + if (h->msix_vector) + pci_disable_msix(h->pdev); + else if (h->msi_vector) + pci_disable_msi(h->pdev); +#endif /* CONFIG_PCI_MSI */ + cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); + cciss_free_scatterlists(h); + cciss_free_cmd_pool(h); + kfree(h->blockFetchTable); + if (h->reply_pool) + pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64), + h->reply_pool, h->reply_pool_dhandle); + if (h->transtable) + iounmap(h->transtable); + if (h->cfgtable) + iounmap(h->cfgtable); + if (h->vaddr) + iounmap(h->vaddr); + unregister_blkdev(h->major, h->devname); + cciss_destroy_hba_sysfs_entry(h); + pci_release_regions(h->pdev); + kfree(h); + hba[ctlr] = NULL; +} + /* * This is it. Find all the controllers and register them. I really hate * stealing all these major device numbers. @@ -4756,13 +4892,27 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, int i; int j = 0; int rc; + int try_soft_reset = 0; int dac, return_code; InquiryData_struct *inq_buff; ctlr_info_t *h; + unsigned long flags; rc = cciss_init_reset_devices(pdev); - if (rc) - return rc; + if (rc) { + if (rc != -ENOTSUPP) + return rc; + /* If the reset fails in a particular way (it has no way to do + * a proper hard reset, so returns -ENOTSUPP) we can try to do + * a soft reset once we get the controller configured up to the + * point that it can accept a command. + */ + try_soft_reset = 1; + rc = 0; + } + +reinit_after_soft_reset: + i = alloc_cciss_hba(pdev); if (i < 0) return -1; @@ -4852,6 +5002,62 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, h->gendisk[j] = NULL; } + /* At this point, the controller is ready to take commands. + * Now, if reset_devices and the hard reset didn't work, try + * the soft reset and see if that works. + */ + if (try_soft_reset) { + + /* This is kind of gross. We may or may not get a completion + * from the soft reset command, and if we do, then the value + * from the fifo may or may not be valid. So, we wait 10 secs + * after the reset throwing away any completions we get during + * that time. Unregister the interrupt handler and register + * fake ones to scoop up any residual completions. + */ + spin_lock_irqsave(&h->lock, flags); + h->access.set_intr_mask(h, CCISS_INTR_OFF); + spin_unlock_irqrestore(&h->lock, flags); + free_irq(h->intr[PERF_MODE_INT], h); + rc = cciss_request_irq(h, cciss_msix_discard_completions, + cciss_intx_discard_completions); + if (rc) { + dev_warn(&h->pdev->dev, "Failed to request_irq after " + "soft reset.\n"); + goto clean4; + } + + rc = cciss_kdump_soft_reset(h); + if (rc) { + dev_warn(&h->pdev->dev, "Soft reset failed.\n"); + goto clean4; + } + + dev_info(&h->pdev->dev, "Board READY.\n"); + dev_info(&h->pdev->dev, + "Waiting for stale completions to drain.\n"); + h->access.set_intr_mask(h, CCISS_INTR_ON); + msleep(10000); + h->access.set_intr_mask(h, CCISS_INTR_OFF); + + rc = controller_reset_failed(h->cfgtable); + if (rc) + dev_info(&h->pdev->dev, + "Soft reset appears to have failed.\n"); + + /* since the controller's reset, we have to go back and re-init + * everything. Easiest to just forget what we've done and do it + * all over again. + */ + cciss_undo_allocations_after_kdump_soft_reset(h); + try_soft_reset = 0; + if (rc) + /* don't go to clean4, we already unallocated */ + return -ENODEV; + + goto reinit_after_soft_reset; + } + cciss_scsi_setup(h); /* Turn the interrupts on so we can service requests */ -- cgit v1.2.3 From 93c46c2fa7cfb272c3014327830d6cb30d8486a4 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:53:57 -0500 Subject: cciss: remove superfluous sleeps around reset code Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 23b0ba49300a..7990b98d4f73 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4477,7 +4477,6 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev, */ dev_info(&pdev->dev, "using doorbell to reset controller\n"); writel(use_doorbell, vaddr + SA5_DOORBELL); - msleep(1000); } else { /* Try to do it the PCI power state way */ /* Quoting from the Open CISS Specification: "The Power @@ -4508,8 +4507,6 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev, pmcsr &= ~PCI_PM_CTRL_STATE_MASK; pmcsr |= PCI_D0; pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); - - msleep(500); } return 0; } -- cgit v1.2.3 From ec52d5f1cb9a1a0db02143fdcc6004749ea19e0b Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:54:02 -0500 Subject: cciss: do not attempt PCI power management reset method if we know it won't work. Just go straight to the soft-reset method instead. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 7990b98d4f73..865484e40841 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -558,7 +558,7 @@ static void __devinit cciss_procinit(ctlr_info_t *h) #define to_hba(n) container_of(n, struct ctlr_info, dev) #define to_drv(n) container_of(n, drive_info_struct, dev) -/* List of controllers which cannot be reset on kexec with reset_devices */ +/* List of controllers which cannot be hard reset on kexec with reset_devices */ static u32 unresettable_controller[] = { 0x324a103C, /* Smart Array P712m */ 0x324b103C, /* SmartArray P711m */ @@ -576,23 +576,45 @@ static u32 unresettable_controller[] = { 0x409D0E11, /* Smart Array 6400 EM */ }; -static int ctlr_is_resettable(struct ctlr_info *h) +/* List of controllers which cannot even be soft reset */ +static u32 soft_unresettable_controller[] = { + 0x409C0E11, /* Smart Array 6400 */ + 0x409D0E11, /* Smart Array 6400 EM */ +}; + +static int ctlr_is_hard_resettable(u32 board_id) { int i; for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++) - if (unresettable_controller[i] == h->board_id) + if (unresettable_controller[i] == board_id) return 0; return 1; } +static int ctlr_is_soft_resettable(u32 board_id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(soft_unresettable_controller); i++) + if (soft_unresettable_controller[i] == board_id) + return 0; + return 1; +} + +static int ctlr_is_resettable(u32 board_id) +{ + return ctlr_is_hard_resettable(board_id) || + ctlr_is_soft_resettable(board_id); +} + static ssize_t host_show_resettable(struct device *dev, struct device_attribute *attr, char *buf) { struct ctlr_info *h = to_hba(dev); - return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h)); + return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h->board_id)); } static DEVICE_ATTR(resettable, S_IRUGO, host_show_resettable, NULL); @@ -4601,12 +4623,16 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) * likely not be happy. Just forbid resetting this conjoined mess. */ cciss_lookup_board_id(pdev, &board_id); - if (board_id == 0x409C0E11 || board_id == 0x409D0E11) { + if (!ctlr_is_resettable(board_id)) { dev_warn(&pdev->dev, "Cannot reset Smart Array 640x " "due to shared cache module."); return -ENODEV; } + /* if controller is soft- but not hard resettable... */ + if (!ctlr_is_hard_resettable(board_id)) + return -ENOTSUPP; /* try soft reset later. */ + /* Save the PCI command register */ pci_read_config_word(pdev, 4, &command_register); /* Turn the board off. This is so that later pci_restore_state() -- cgit v1.2.3 From 063d2cf72ab6101d2dd69bd6fb503b229be70325 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:54:07 -0500 Subject: cciss: do not use bit 2 doorbell reset It causes NMIs which are undesirable at best, unsurvivable at worst. Prefer the soft reset instead. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 865484e40841..9528e94dc8b2 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -4673,8 +4673,14 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) use_doorbell = DOORBELL_CTLR_RESET2; } else { use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET; - if (use_doorbell) - use_doorbell = DOORBELL_CTLR_RESET; + if (use_doorbell) { + dev_warn(&pdev->dev, "Controller claims that " + "'Bit 2 doorbell reset' is " + "supported, but not 'bit 5 doorbell reset'. " + "Firmware update is recommended.\n"); + rc = -ENOTSUPP; /* use the soft reset */ + goto unmap_cfgtable; + } } rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell); -- cgit v1.2.3 From 8a4ec67bd5648beb09d7db988a75835b740e950d Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Tue, 3 May 2011 14:54:12 -0500 Subject: cciss: add cciss_tape_cmds module paramter This is to allow number of commands reserved for use by SCSI tape drives and medium changers to be adjusted at driver load time via the kernel parameter cciss_tape_cmds, with a default value of 6, and a range of 2 - 16 inclusive. Previously, the driver limited the number of commands which could be queued to the SCSI half of the the driver to only 2. This is to fix the problem that if you had more than two tape drives, you couldn't, for example, erase or rewind them all at the same time. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 11 ++++++++++- drivers/block/cciss_scsi.c | 41 +++++++++++++++++++++++------------------ drivers/block/cciss_scsi.h | 4 ---- 3 files changed, 33 insertions(+), 23 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 9528e94dc8b2..abe90c973540 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -64,6 +64,10 @@ MODULE_DESCRIPTION("Driver for HP Smart Array Controllers"); MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers"); MODULE_VERSION("3.6.26"); MODULE_LICENSE("GPL"); +static int cciss_tape_cmds = 6; +module_param(cciss_tape_cmds, int, 0644); +MODULE_PARM_DESC(cciss_tape_cmds, + "number of commands to allocate for tape devices (default: 6)"); static DEFINE_MUTEX(cciss_mutex); static struct proc_dir_entry *proc_cciss; @@ -4221,7 +4225,7 @@ static void __devinit cciss_get_max_perf_mode_cmds(struct ctlr_info *h) static void __devinit cciss_find_board_params(ctlr_info_t *h) { cciss_get_max_perf_mode_cmds(h); - h->nr_cmds = h->max_commands - 4; /* Allow room for some ioctls */ + h->nr_cmds = h->max_commands - 4 - cciss_tape_cmds; h->maxsgentries = readl(&(h->cfgtable->MaxSGElements)); /* * Limit in-command s/g elements to 32 save dma'able memory. @@ -4959,6 +4963,11 @@ reinit_after_soft_reset: sprintf(h->devname, "cciss%d", i); h->ctlr = i; + if (cciss_tape_cmds < 2) + cciss_tape_cmds = 2; + if (cciss_tape_cmds > 16) + cciss_tape_cmds = 16; + init_completion(&h->scan_wait); if (cciss_create_hba_sysfs_entry(h)) diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index df793803f5ae..696100241a6f 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -84,7 +84,6 @@ static struct scsi_host_template cciss_driver_template = { .proc_name = "cciss", .proc_info = cciss_scsi_proc_info, .queuecommand = cciss_scsi_queue_command, - .can_queue = SCSI_CCISS_CAN_QUEUE, .this_id = 7, .cmd_per_lun = 1, .use_clustering = DISABLE_CLUSTERING, @@ -108,16 +107,13 @@ struct cciss_scsi_cmd_stack_elem_t { #pragma pack() -#define CMD_STACK_SIZE (SCSI_CCISS_CAN_QUEUE * \ - CCISS_MAX_SCSI_DEVS_PER_HBA + 2) - // plus two for init time usage - #pragma pack(1) struct cciss_scsi_cmd_stack_t { struct cciss_scsi_cmd_stack_elem_t *pool; - struct cciss_scsi_cmd_stack_elem_t *elem[CMD_STACK_SIZE]; + struct cciss_scsi_cmd_stack_elem_t **elem; dma_addr_t cmd_pool_handle; int top; + int nelems; }; #pragma pack() @@ -191,7 +187,7 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c) sa = h->scsi_ctlr; stk = &sa->cmd_stack; stk->top++; - if (stk->top >= CMD_STACK_SIZE) { + if (stk->top >= stk->nelems) { dev_err(&h->pdev->dev, "scsi_cmd_free called too many times.\n"); BUG(); @@ -206,13 +202,14 @@ scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa) struct cciss_scsi_cmd_stack_t *stk; size_t size; + stk = &sa->cmd_stack; + stk->nelems = cciss_tape_cmds + 2; sa->cmd_sg_list = cciss_allocate_sg_chain_blocks(h, - h->chainsize, CMD_STACK_SIZE); + h->chainsize, stk->nelems); if (!sa->cmd_sg_list && h->chainsize > 0) return -ENOMEM; - stk = &sa->cmd_stack; - size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE; + size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems; /* Check alignment, see cciss_cmd.h near CommandList_struct def. */ BUILD_BUG_ON((sizeof(*stk->pool) % COMMANDLIST_ALIGNMENT) != 0); @@ -221,18 +218,23 @@ scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa) pci_alloc_consistent(h->pdev, size, &stk->cmd_pool_handle); if (stk->pool == NULL) { - cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE); + cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems); sa->cmd_sg_list = NULL; return -ENOMEM; } - - for (i=0; ielem = kmalloc(sizeof(stk->elem[0]) * stk->nelems, GFP_KERNEL); + if (!stk->elem) { + pci_free_consistent(h->pdev, size, stk->pool, + stk->cmd_pool_handle); + return -1; + } + for (i = 0; i < stk->nelems; i++) { stk->elem[i] = &stk->pool[i]; stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle + (sizeof(struct cciss_scsi_cmd_stack_elem_t) * i)); stk->elem[i]->cmdindex = i; } - stk->top = CMD_STACK_SIZE-1; + stk->top = stk->nelems-1; return 0; } @@ -245,16 +247,18 @@ scsi_cmd_stack_free(ctlr_info_t *h) sa = h->scsi_ctlr; stk = &sa->cmd_stack; - if (stk->top != CMD_STACK_SIZE-1) { + if (stk->top != stk->nelems-1) { dev_warn(&h->pdev->dev, "bug: %d scsi commands are still outstanding.\n", - CMD_STACK_SIZE - stk->top); + stk->nelems - stk->top); } - size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE; + size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems; pci_free_consistent(h->pdev, size, stk->pool, stk->cmd_pool_handle); stk->pool = NULL; - cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE); + cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems); + kfree(stk->elem); + stk->elem = NULL; } #if 0 @@ -859,6 +863,7 @@ cciss_scsi_detect(ctlr_info_t *h) sh->io_port = 0; // good enough? FIXME, sh->n_io_port = 0; // I don't think we use these two... sh->this_id = SELF_SCSI_ID; + sh->can_queue = cciss_tape_cmds; sh->sg_tablesize = h->maxsgentries; sh->max_cmd_len = MAX_COMMAND_SIZE; diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h index 6d5822fe851a..e71d986727ca 100644 --- a/drivers/block/cciss_scsi.h +++ b/drivers/block/cciss_scsi.h @@ -36,13 +36,9 @@ addressible natively, and may in fact turn out to be not scsi at all. */ -#define SCSI_CCISS_CAN_QUEUE 2 /* -Note, cmd_per_lun could give us some trouble, so I'm setting it very low. -Likewise, SCSI_CCISS_CAN_QUEUE is set very conservatively. - If the upper scsi layer tries to track how many commands we have outstanding, it will be operating under the misapprehension that it is the only one sending us requests. We also have the block interface, -- cgit v1.2.3 From edc83d47a9928281ecf6fb709753edb3c58ae7f1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 May 2011 08:27:00 -0600 Subject: cciss: fix compile issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/block/cciss.c: In function ‘cciss_send_reset’: drivers/block/cciss.c:2515:2: error: implicit declaration of function ‘fill_cmd’ drivers/block/cciss.c: At top level: drivers/block/cciss.c:2531:12: error: conflicting types for ‘fill_cmd’ drivers/block/cciss.c:2534:1: note: an argument type that has a default promotion can’t match an empty parameter name list declaration drivers/block/cciss.c:2515:18: note: previous implicit declaration of ‘fill_cmd’ was here make[1]: *** [drivers/block/cciss.o] Error 1 make: *** [drivers/block/cciss.o] Error 2 Move fill_cmd() to above where it is first used. Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index abe90c973540..8f4ef656a1af 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2503,31 +2503,6 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, return 0; } -static int __devinit cciss_send_reset(ctlr_info_t *h, unsigned char *scsi3addr, - u8 reset_type) -{ - CommandList_struct *c; - int return_status; - - c = cmd_alloc(h); - if (!c) - return -ENOMEM; - return_status = fill_cmd(h, c, CCISS_RESET_MSG, NULL, 0, 0, - CTLR_LUNID, TYPE_MSG); - c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */ - if (return_status != IO_OK) { - cmd_special_free(h, c); - return return_status; - } - c->waiting = NULL; - enqueue_cmd_and_start_io(h, c); - /* Don't wait for completion, the reset won't complete. Don't free - * the command either. This is the last command we will send before - * re-initializing everything, so it doesn't matter and won't leak. - */ - return 0; -} - static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff, size_t size, __u8 page_code, unsigned char *scsi3addr, int cmd_type) @@ -2668,6 +2643,31 @@ static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff, return status; } +static int __devinit cciss_send_reset(ctlr_info_t *h, unsigned char *scsi3addr, + u8 reset_type) +{ + CommandList_struct *c; + int return_status; + + c = cmd_alloc(h); + if (!c) + return -ENOMEM; + return_status = fill_cmd(h, c, CCISS_RESET_MSG, NULL, 0, 0, + CTLR_LUNID, TYPE_MSG); + c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */ + if (return_status != IO_OK) { + cmd_special_free(h, c); + return return_status; + } + c->waiting = NULL; + enqueue_cmd_and_start_io(h, c); + /* Don't wait for completion, the reset won't complete. Don't free + * the command either. This is the last command we will send before + * re-initializing everything, so it doesn't matter and won't leak. + */ + return 0; +} + static int check_target_status(ctlr_info_t *h, CommandList_struct *c) { switch (c->err_info->ScsiStatus) { -- cgit v1.2.3 From 01f37f2d53e14a05b7fc3601d182f31ac3b35847 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 11 May 2011 15:57:09 -0400 Subject: xen/blkback: Fixed up comments and converted spaces to tabs. Suggested-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 70 +++++++++++++++++++-------------- drivers/block/xen-blkback/common.h | 77 ++++++++++++++++++++----------------- drivers/block/xen-blkback/xenbus.c | 39 +++++++++++-------- 3 files changed, 105 insertions(+), 81 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index b9bdd9e43ab9..6808cc7d9c73 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -73,13 +73,13 @@ module_param(debug_lvl, int, 0644); * response queued for it, with the saved 'id' passed back. */ struct pending_req { - struct blkif_st *blkif; - u64 id; - int nr_pages; - atomic_t pendcnt; - unsigned short operation; - int status; - struct list_head free_list; + struct blkif_st *blkif; + u64 id; + int nr_pages; + atomic_t pendcnt; + unsigned short operation; + int status; + struct list_head free_list; }; #define BLKBACK_INVALID_HANDLE (~0) @@ -103,7 +103,8 @@ static struct xen_blkbk *blkbk; * Little helpful macro to figure out the index and virtual address of the * pending_pages[..]. For each 'pending_req' we have have up to * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through - * 10 and would index in the pending_pages[..]. */ + * 10 and would index in the pending_pages[..]. + */ static inline int vaddr_pagenr(struct pending_req *req, int seg) { return (req - blkbk->pending_reqs) * @@ -167,8 +168,6 @@ static void free_req(struct pending_req *req) /* * Routines for managing virtual block devices (vbds). */ - - static int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) { @@ -315,7 +314,7 @@ struct seg_buf { /* * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. -*/ + */ static void xen_blkbk_unmap(struct pending_req *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -336,27 +335,32 @@ static void xen_blkbk_unmap(struct pending_req *req) ret = HYPERVISOR_grant_table_op( GNTTABOP_unmap_grant_ref, unmap, invcount); BUG_ON(ret); - /* Note, we use invcount, so nr->pages, so we can't index + /* + * Note, we use invcount, so nr->pages, so we can't index * using vaddr(req, i). */ for (i = 0; i < invcount; i++) { ret = m2p_remove_override( virt_to_page(unmap[i].host_addr), false); if (ret) { - printk(KERN_ALERT "Failed to remove M2P override for " \ - "%lx\n", (unsigned long)unmap[i].host_addr); + printk(KERN_ALERT "Failed to remove M2P override for %lx\n", + (unsigned long)unmap[i].host_addr); continue; } } } -static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_req, + +static int xen_blkbk_map(struct blkif_request *req, + struct pending_req *pending_req, struct seg_buf seg[]) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i; int nseg = req->nr_segments; int ret = 0; - /* Fill out preq.nr_sects with proper amount of sectors, and setup + + /* + * Fill out preq.nr_sects with proper amount of sectors, and setup * assign map[..] with the PFN of the page in our domain with the * corresponding grant reference for each page. */ @@ -367,13 +371,15 @@ static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_ if (pending_req->operation != BLKIF_OP_READ) flags |= GNTMAP_readonly; gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, - req->u.rw.seg[i].gref, pending_req->blkif->domid); + req->u.rw.seg[i].gref, + pending_req->blkif->domid); } ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); BUG_ON(ret); - /* Now swizzel the MFN in our domain with the MFN from the other domain + /* + * Now swizzle the MFN in our domain with the MFN from the other domain * so that when we access vaddr(pending_req,i) it has the contents of * the page from the other domain. */ @@ -423,7 +429,8 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) pending_req->status = BLKIF_RSP_ERROR; } - /* If all of the bio's have completed it is time to unmap + /* + * If all of the bio's have completed it is time to unmap * the grant references associated with 'request' and provide * the proper response on the ring. */ @@ -510,8 +517,8 @@ static int do_block_io_op(struct blkif_st *blkif) } /* - * Transumation of the 'struct blkif_request' to a proper 'struct bio' - * and call the 'submit_bio' to pass it to the underlaying storage. + * Transmutation of the 'struct blkif_request' to a proper 'struct bio' + * and call the 'submit_bio' to pass it to the underlying storage. */ static int dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, @@ -538,8 +545,10 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, case BLKIF_OP_FLUSH_DISKCACHE: blkif->st_f_req++; operation = WRITE_FLUSH; - /* The frontend likes to set this to -1, which vbd_translate - * is alergic too. */ + /* + * The frontend likes to set this to -1, which vbd_translate + * is alergic too. + */ req->u.rw.sector_number = 0; break; case BLKIF_OP_WRITE_BARRIER: @@ -585,8 +594,11 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, preq.sector_number + preq.nr_sects, preq.dev); goto fail_response; } - /* This check _MUST_ be done after vbd_translate as the preq.bdev - * is set there. */ + + /* + * This check _MUST_ be done after vbd_translate as the preq.bdev + * is set there. + */ for (i = 0; i < nseg; i++) { if (((int)preq.sector_number|(int)seg[i].nsec) & ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { @@ -595,7 +607,9 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, goto fail_response; } } - /* If we have failed at this point, we need to undo the M2P override, + + /* + * If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. @@ -638,8 +652,8 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, bio->bi_end_io = end_block_io_op; } - - /* We set it one so that the last submit_bio does not have to call + /* + * We set it one so that the last submit_bio does not have to call * atomic_inc. */ atomic_set(&pending_req->pendcnt, nbio); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index af93837e1295..e37dcf7f6b8e 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -47,53 +47,58 @@ __FILE__ , __LINE__ , ## _a) struct vbd { - blkif_vdev_t handle; /* what the domain refers to this vbd as */ - unsigned char readonly; /* Non-zero -> read-only */ - unsigned char type; /* VDISK_xxx */ - u32 pdevice; /* phys device that this vbd maps to */ - struct block_device *bdev; - sector_t size; /* Cached size parameter */ - bool flush_support; + /* What the domain refers to this vbd as. */ + blkif_vdev_t handle; + /* Non-zero -> read-only */ + unsigned char readonly; + /* VDISK_xxx */ + unsigned char type; + /* phys device that this vbd maps to. */ + u32 pdevice; + struct block_device *bdev; + /* Cached size parameter. */ + sector_t size; + bool flush_support; }; struct backend_info; struct blkif_st { /* Unique identifier for this interface. */ - domid_t domid; - unsigned int handle; + domid_t domid; + unsigned int handle; /* Physical parameters of the comms window. */ - unsigned int irq; + unsigned int irq; /* Comms information. */ - enum blkif_protocol blk_protocol; - union blkif_back_rings blk_rings; - struct vm_struct *blk_ring_area; + enum blkif_protocol blk_protocol; + union blkif_back_rings blk_rings; + struct vm_struct *blk_ring_area; /* The VBD attached to this interface. */ - struct vbd vbd; + struct vbd vbd; /* Back pointer to the backend_info. */ - struct backend_info *be; + struct backend_info *be; /* Private fields. */ - spinlock_t blk_ring_lock; - atomic_t refcnt; + spinlock_t blk_ring_lock; + atomic_t refcnt; - wait_queue_head_t wq; + wait_queue_head_t wq; /* One thread per one blkif. */ - struct task_struct *xenblkd; - unsigned int waiting_reqs; + struct task_struct *xenblkd; + unsigned int waiting_reqs; /* statistics */ - unsigned long st_print; - int st_rd_req; - int st_wr_req; - int st_oo_req; - int st_f_req; - int st_rd_sect; - int st_wr_sect; - - wait_queue_head_t waiting_to_free; - - grant_handle_t shmem_handle; - grant_ref_t shmem_ref; + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_f_req; + int st_rd_sect; + int st_wr_sect; + + wait_queue_head_t waiting_to_free; + + grant_handle_t shmem_handle; + grant_ref_t shmem_ref; }; @@ -109,10 +114,10 @@ struct blkif_st { } while (0) struct phys_req { - unsigned short dev; - unsigned short nr_sects; - struct block_device *bdev; - blkif_sector_t sector_number; + unsigned short dev; + unsigned short nr_sects; + struct block_device *bdev; + blkif_sector_t sector_number; }; int xen_blkif_interface_init(void); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 9adcf806f83f..0cda406b4edb 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -27,12 +27,12 @@ __func__, __LINE__, ##args) struct backend_info { - struct xenbus_device *dev; - struct blkif_st *blkif; - struct xenbus_watch backend_watch; - unsigned major; - unsigned minor; - char *mode; + struct xenbus_device *dev; + struct blkif_st *blkif; + struct xenbus_watch backend_watch; + unsigned major; + unsigned minor; + char *mode; }; static struct kmem_cache *xen_blkif_cachep; @@ -425,7 +425,7 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, return err; } -/** +/* * Entry point to this code when a new device is created. Allocate the basic * structures, and watch the store waiting for the hotplug scripts to tell us * the device's physical major and minor numbers. Switch to InitWait. @@ -473,7 +473,7 @@ fail: } -/** +/* * Callback received when the hotplug scripts have placed the physical-device * node. Read it and the mode node, and create a vbd. If the frontend is * ready, connect. @@ -495,9 +495,11 @@ static void backend_changed(struct xenbus_watch *watch, err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", &major, &minor); if (XENBUS_EXIST_ERR(err)) { - /* Since this watch will fire once immediately after it is - registered, we expect this. Ignore it, and wait for the - hotplug scripts. */ + /* + * Since this watch will fire once immediately after it is + * registered, we expect this. Ignore it, and wait for the + * hotplug scripts. + */ return; } if (err != 2) { @@ -562,7 +564,7 @@ static void backend_changed(struct xenbus_watch *watch, } -/** +/* * Callback received when the frontend's state changes. */ static void frontend_changed(struct xenbus_device *dev, @@ -584,13 +586,16 @@ static void frontend_changed(struct xenbus_device *dev, case XenbusStateInitialised: case XenbusStateConnected: - /* Ensure we connect even when two watches fire in - close successsion and we miss the intermediate value - of frontend_state. */ + /* + * Ensure we connect even when two watches fire in + * close successsion and we miss the intermediate value + * of frontend_state. + */ if (dev->state == XenbusStateConnected) break; - /* Enforce precondition before potential leak point. + /* + * Enforce precondition before potential leak point. * blkif_disconnect() is idempotent. */ xen_blkif_disconnect(be->blkif); @@ -627,7 +632,7 @@ static void frontend_changed(struct xenbus_device *dev, /* ** Connection ** */ -/** +/* * Write the physical details regarding the block device to the store, and * switch to Connected state. */ -- cgit v1.2.3 From 4352b47ab7918108b389a48d2163c9a4c2aaf139 Mon Sep 17 00:00:00 2001 From: Marek Marczykowski Date: Tue, 3 May 2011 12:04:52 -0400 Subject: xen-blkfront: fix data size for xenbus_gather in blkfront_connect barrier variable is int, not long. This overflow caused another variable override: "err" (in PV code) and "binfo" (in xenlinux code - drivers/xen/blkfront/blkfront.c). The later caused incorrect device flags (RO/removable etc). Signed-off-by: Marek Marczykowski Acked-by: Ian Campbell [v1: Changed title] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 9cb8668ff5f4..20759817ad57 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1141,7 +1141,7 @@ static void blkfront_connect(struct blkfront_info *info) } err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-barrier", "%lu", &barrier, + "feature-barrier", "%d", &barrier, NULL); /* -- cgit v1.2.3 From edf6ef59ec7ee82cadc93528229c4097b6c92a7e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 3 May 2011 12:01:11 -0400 Subject: xen-blkfront: Introduce BLKIF_OP_FLUSH_DISKCACHE support. If the backend supports the 'feature-flush-cache' mode, use that instead of the 'feature-barrier' support. Currently there are three backends that support the 'feature-flush-cache' mode: NetBSD, Solaris and Linux kernel. The 'flush' option is much light-weight version than the 'barrier' support so lets try to use as there are no filesystems in the kernel that use full barriers anymore. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 49 ++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 13 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 20759817ad57..b536a9cef917 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -97,6 +97,7 @@ struct blkfront_info struct blk_shadow shadow[BLK_RING_SIZE]; unsigned long shadow_free; unsigned int feature_flush; + unsigned int flush_op; int is_ready; }; @@ -250,8 +251,7 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode, /* * Generate a Xen blkfront IO request from a blk layer request. Reads - * and writes are handled as expected. Since we lack a loose flush - * request, we map flushes into a full ordered barrier. + * and writes are handled as expected. * * @req: a request struct */ @@ -293,14 +293,13 @@ static int blkif_queue_request(struct request *req) if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { /* - * Ideally we could just do an unordered - * flush-to-disk, but all we have is a full write - * barrier at the moment. However, a barrier write is + * Ideally we can do an unordered flush-to-disk. In case the + * backend onlysupports barriers, use that. A barrier request * a superset of FUA, so we can implement it the same * way. (It's also a FLUSH+FUA, since it is * guaranteed ordered WRT previous writes.) */ - ring_req->operation = BLKIF_OP_WRITE_BARRIER; + ring_req->operation = info->flush_op; } ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); @@ -433,8 +432,11 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) static void xlvbd_flush(struct blkfront_info *info) { blk_queue_flush(info->rq, info->feature_flush); - printk(KERN_INFO "blkfront: %s: barriers %s\n", + printk(KERN_INFO "blkfront: %s: %s: %s\n", info->gd->disk_name, + info->flush_op == BLKIF_OP_WRITE_BARRIER ? + "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? + "flush diskcache" : "barrier or flush"), info->feature_flush ? "enabled" : "disabled"); } @@ -720,15 +722,20 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; switch (bret->operation) { + case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { - printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", + printk(KERN_WARNING "blkfront: %s: write %s op failed\n", + info->flush_op == BLKIF_OP_WRITE_BARRIER ? + "barrier" : "flush disk cache", info->gd->disk_name); error = -EOPNOTSUPP; } if (unlikely(bret->status == BLKIF_RSP_ERROR && info->shadow[id].req.nr_segments == 0)) { - printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n", + printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", + info->flush_op == BLKIF_OP_WRITE_BARRIER ? + "barrier" : "flush disk cache", info->gd->disk_name); error = -EOPNOTSUPP; } @@ -736,6 +743,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) if (error == -EOPNOTSUPP) error = 0; info->feature_flush = 0; + info->flush_op = 0; xlvbd_flush(info); } /* fall through */ @@ -1100,7 +1108,7 @@ static void blkfront_connect(struct blkfront_info *info) unsigned long sector_size; unsigned int binfo; int err; - int barrier; + int barrier, flush; switch (info->connected) { case BLKIF_STATE_CONNECTED: @@ -1140,6 +1148,9 @@ static void blkfront_connect(struct blkfront_info *info) return; } + info->feature_flush = 0; + info->flush_op = 0; + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, "feature-barrier", "%d", &barrier, NULL); @@ -1151,11 +1162,23 @@ static void blkfront_connect(struct blkfront_info *info) * * If there are barriers, then we use flush. */ - info->feature_flush = 0; - - if (!err && barrier) + if (!err && barrier) { info->feature_flush = REQ_FLUSH | REQ_FUA; + info->flush_op = BLKIF_OP_WRITE_BARRIER; + } + /* + * And if there is "feature-flush-cache" use that above + * barriers. + */ + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "feature-flush-cache", "%d", &flush, + NULL); + if (!err && flush) { + info->feature_flush = REQ_FLUSH; + info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; + } + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", -- cgit v1.2.3 From ebe8190659244ec21b5f16950cf7b156f5b7eb01 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:42:31 -0400 Subject: xen/blkback: Change printk/DPRINTK to pr_.. type variant. And also make them uniform and prefix the message with 'xen-blkback'. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 58 ++++++++++++++++++------------------- drivers/block/xen-blkback/xenbus.c | 19 +++++------- 2 files changed, 37 insertions(+), 40 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 6808cc7d9c73..5c9568e39eab 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -196,20 +196,20 @@ static void vbd_resize(struct blkif_st *blkif) struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); unsigned long long new_size = vbd_sz(vbd); - printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", + pr_info("xen-blkback: VBD Resize: Domid: %d, Device: (%d, %d)\n", blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); - printk(KERN_INFO "VBD Resize: new size %llu\n", new_size); + pr_info("xen-blkback: VBD Resize: new size %llu\n", new_size); vbd->size = new_size; again: err = xenbus_transaction_start(&xbt); if (err) { - printk(KERN_WARNING "Error starting transaction"); + pr_warn("xen-blkback: Error starting transaction"); return; } err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(vbd)); if (err) { - printk(KERN_WARNING "Error writing new size"); + pr_warn("xen-blkback: Error writing new size"); goto abort; } /* @@ -219,7 +219,7 @@ again: */ err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); if (err) { - printk(KERN_WARNING "Error writing the state"); + pr_warn("xen-blkback: Error writing the state"); goto abort; } @@ -227,7 +227,7 @@ again: if (err == -EAGAIN) goto again; if (err) - printk(KERN_WARNING "Error ending transaction"); + pr_warn("xen-blkback: Error ending transaction"); abort: xenbus_transaction_end(xbt, 1); } @@ -253,9 +253,9 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct blkif_st *blkif) { - printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | f %4d\n", - current->comm, blkif->st_oo_req, - blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); + pr_debug("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", + current->comm, blkif->st_oo_req, + blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); blkif->st_rd_req = 0; blkif->st_wr_req = 0; @@ -270,7 +270,7 @@ int xen_blkif_schedule(void *arg) xen_blkif_get(blkif); if (debug_lvl) - printk(KERN_DEBUG "%s: started\n", current->comm); + pr_debug("xen-blkback: %s: started\n", current->comm); while (!kthread_should_stop()) { if (try_to_freeze()) @@ -299,7 +299,7 @@ int xen_blkif_schedule(void *arg) if (log_stats) print_stats(blkif); if (debug_lvl) - printk(KERN_DEBUG "%s: exiting\n", current->comm); + pr_debug("xen-blkback: %s: exiting\n", current->comm); blkif->xenblkd = NULL; xen_blkif_put(blkif); @@ -343,8 +343,8 @@ static void xen_blkbk_unmap(struct pending_req *req) ret = m2p_remove_override( virt_to_page(unmap[i].host_addr), false); if (ret) { - printk(KERN_ALERT "Failed to remove M2P override for %lx\n", - (unsigned long)unmap[i].host_addr); + pr_alert("xen-blkback: Failed to remove M2P override for %lx\n", + (unsigned long)unmap[i].host_addr); continue; } } @@ -385,7 +385,7 @@ static int xen_blkbk_map(struct blkif_request *req, */ for (i = 0; i < nseg; i++) { if (unlikely(map[i].status != 0)) { - DPRINTK("invalid buffer -- could not remap it\n"); + pr_debug("xen-blkback: invalid buffer -- could not remap it\n"); map[i].handle = BLKBACK_INVALID_HANDLE; ret |= 1; } @@ -398,9 +398,8 @@ static int xen_blkbk_map(struct blkif_request *req, ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), blkbk->pending_page(pending_req, i), false); if (ret) { - printk(KERN_ALERT "Failed to install M2P override for"\ - " %lx (ret: %d)\n", (unsigned long) - map[i].dev_bus_addr, ret); + pr_alert("xen-blkback: Failed to install M2P override for %lx (ret: %d)\n", + (unsigned long)map[i].dev_bus_addr, ret); /* We could switch over to GNTTABOP_copy */ continue; } @@ -420,12 +419,12 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) /* An error fails the entire request. */ if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && (error == -EOPNOTSUPP)) { - DPRINTK("blkback: flush diskcache op failed, not supported\n"); + pr_debug("xen-blkback: flush diskcache op failed, not supported\n"); xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if (error) { - DPRINTK("Buffer not up-to-date at end of operation, " - "error=%d\n", error); + pr_debug("xen-blkback: Buffer not up-to-date at end of operation," + " error=%d\n", error); pending_req->status = BLKIF_RSP_ERROR; } @@ -562,7 +561,8 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, nseg = req->nr_segments; if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { - DPRINTK("Bad number of segments in request (%d)\n", nseg); + pr_debug("xen-blkback: Bad number of segments in request (%d)\n", + nseg); /* Haven't submitted any bio's yet. */ goto fail_response; } @@ -588,10 +588,10 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, } if (vbd_translate(&preq, blkif, operation) != 0) { - DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", - operation == READ ? "read" : "write", - preq.sector_number, - preq.sector_number + preq.nr_sects, preq.dev); + pr_debug("xen-blkback: access denied: %s of [%llu,%llu] on dev=%04x\n", + operation == READ ? "read" : "write", + preq.sector_number, + preq.sector_number + preq.nr_sects, preq.dev); goto fail_response; } @@ -602,8 +602,8 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, for (i = 0; i < nseg; i++) { if (((int)preq.sector_number|(int)seg[i].nsec) & ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { - DPRINTK("Misaligned I/O request from domain %d", - blkif->domid); + pr_debug("xen-blkback: Misaligned I/O request from domain %d", + blkif->domid); goto fail_response; } } @@ -759,7 +759,7 @@ static int __init xen_blkif_init(void) blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); if (!blkbk) { - printk(KERN_ALERT "%s: out of memory!\n", __func__); + pr_alert("xen-blkback: %s: out of memory!\n", __func__); return -ENOMEM; } @@ -807,7 +807,7 @@ static int __init xen_blkif_init(void) return 0; out_of_memory: - printk(KERN_ERR "%s: out of memory\n", __func__); + pr_alert("xen-blkback: %s: out of memory\n", __func__); failed_init: kfree(blkbk->pending_reqs); kfree(blkbk->pending_grant_handles); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 0cda406b4edb..c86519c477f3 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -23,7 +23,7 @@ #undef DPRINTK #define DPRINTK(fmt, args...) \ - pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ + pr_debug("xen-blkback: (%s:%d) " fmt ".\n", \ __func__, __LINE__, ##args) struct backend_info { @@ -136,7 +136,7 @@ static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) BUG(); if (op.status) { - DPRINTK(" Grant table operation failure !\n"); + DPRINTK("Grant table operation failure !\n"); return op.status; } @@ -509,10 +509,8 @@ static void backend_changed(struct xenbus_watch *watch, if ((be->major || be->minor) && ((be->major != major) || (be->minor != minor))) { - printk(KERN_WARNING - "blkback: changing physical device (from %x:%x to " - "%x:%x) not supported.\n", be->major, be->minor, - major, minor); + pr_warn("xen-blkback: changing physical device (from %x:%x to %x:%x) not supported.\n", + be->major, be->minor, major, minor); return; } @@ -578,8 +576,8 @@ static void frontend_changed(struct xenbus_device *dev, switch (frontend_state) { case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { - printk(KERN_INFO "%s: %s: prepare for reconnect\n", - __func__, dev->nodename); + pr_info("xen-blkback: %s: prepare for reconnect\n", + dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); } break; @@ -733,9 +731,8 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -1; } - printk(KERN_INFO - "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", - ring_ref, evtchn, be->blkif->blk_protocol, protocol); + pr_info("xen-blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + ring_ref, evtchn, be->blkif->blk_protocol, protocol); /* Map the shared frame, irq etc. */ err = xen_blkif_map(be->blkif, ring_ref, evtchn); -- cgit v1.2.3 From 1afbd730a33c6e4ca780a70351e8929dd4c40636 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 11 May 2011 16:15:24 -0400 Subject: xen/blkback: Make the DPRINTK uniform. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 6 +++--- drivers/block/xen-blkback/xenbus.c | 5 ----- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index e37dcf7f6b8e..46e5d0630440 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -42,9 +42,9 @@ #include #include -#define DPRINTK(_f, _a...) \ - pr_debug("(file=%s, line=%d) " _f, \ - __FILE__ , __LINE__ , ## _a) +#define DPRINTK(fmt, args...) \ + pr_debug("xen-blkback: (%s:%d) " fmt ".\n", \ + __func__, __LINE__, ##args) struct vbd { /* What the domain refers to this vbd as. */ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index c86519c477f3..fa01dbbee0ad 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -21,11 +21,6 @@ #include #include "common.h" -#undef DPRINTK -#define DPRINTK(fmt, args...) \ - pr_debug("xen-blkback: (%s:%d) " fmt ".\n", \ - __func__, __LINE__, ##args) - struct backend_info { struct xenbus_device *dev; struct blkif_st *blkif; -- cgit v1.2.3 From 22b20f2dffd09edd66127f2022c26d0039bad88e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:43:12 -0400 Subject: xen/blkback: Use the DRV_PFX in the pr_.. macros. To make it easier to read. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 36 ++++++++++++++++++------------------ drivers/block/xen-blkback/common.h | 3 ++- drivers/block/xen-blkback/xenbus.c | 6 +++--- 3 files changed, 23 insertions(+), 22 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 5c9568e39eab..09fe528dd088 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -196,20 +196,20 @@ static void vbd_resize(struct blkif_st *blkif) struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); unsigned long long new_size = vbd_sz(vbd); - pr_info("xen-blkback: VBD Resize: Domid: %d, Device: (%d, %d)\n", + pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n", blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); - pr_info("xen-blkback: VBD Resize: new size %llu\n", new_size); + pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size); vbd->size = new_size; again: err = xenbus_transaction_start(&xbt); if (err) { - pr_warn("xen-blkback: Error starting transaction"); + pr_warn(DRV_PFX "Error starting transaction"); return; } err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(vbd)); if (err) { - pr_warn("xen-blkback: Error writing new size"); + pr_warn(DRV_PFX "Error writing new size"); goto abort; } /* @@ -219,7 +219,7 @@ again: */ err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); if (err) { - pr_warn("xen-blkback: Error writing the state"); + pr_warn(DRV_PFX "Error writing the state"); goto abort; } @@ -227,7 +227,7 @@ again: if (err == -EAGAIN) goto again; if (err) - pr_warn("xen-blkback: Error ending transaction"); + pr_warn(DRV_PFX "Error ending transaction"); abort: xenbus_transaction_end(xbt, 1); } @@ -270,7 +270,7 @@ int xen_blkif_schedule(void *arg) xen_blkif_get(blkif); if (debug_lvl) - pr_debug("xen-blkback: %s: started\n", current->comm); + pr_debug(DRV_PFX "%s: started\n", current->comm); while (!kthread_should_stop()) { if (try_to_freeze()) @@ -299,7 +299,7 @@ int xen_blkif_schedule(void *arg) if (log_stats) print_stats(blkif); if (debug_lvl) - pr_debug("xen-blkback: %s: exiting\n", current->comm); + pr_debug(DRV_PFX "%s: exiting\n", current->comm); blkif->xenblkd = NULL; xen_blkif_put(blkif); @@ -343,7 +343,7 @@ static void xen_blkbk_unmap(struct pending_req *req) ret = m2p_remove_override( virt_to_page(unmap[i].host_addr), false); if (ret) { - pr_alert("xen-blkback: Failed to remove M2P override for %lx\n", + pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n", (unsigned long)unmap[i].host_addr); continue; } @@ -385,7 +385,7 @@ static int xen_blkbk_map(struct blkif_request *req, */ for (i = 0; i < nseg; i++) { if (unlikely(map[i].status != 0)) { - pr_debug("xen-blkback: invalid buffer -- could not remap it\n"); + pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); map[i].handle = BLKBACK_INVALID_HANDLE; ret |= 1; } @@ -398,7 +398,7 @@ static int xen_blkbk_map(struct blkif_request *req, ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), blkbk->pending_page(pending_req, i), false); if (ret) { - pr_alert("xen-blkback: Failed to install M2P override for %lx (ret: %d)\n", + pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n", (unsigned long)map[i].dev_bus_addr, ret); /* We could switch over to GNTTABOP_copy */ continue; @@ -419,11 +419,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) /* An error fails the entire request. */ if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && (error == -EOPNOTSUPP)) { - pr_debug("xen-blkback: flush diskcache op failed, not supported\n"); + pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if (error) { - pr_debug("xen-blkback: Buffer not up-to-date at end of operation," + pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," " error=%d\n", error); pending_req->status = BLKIF_RSP_ERROR; } @@ -561,7 +561,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, nseg = req->nr_segments; if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { - pr_debug("xen-blkback: Bad number of segments in request (%d)\n", + pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", nseg); /* Haven't submitted any bio's yet. */ goto fail_response; @@ -588,7 +588,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, } if (vbd_translate(&preq, blkif, operation) != 0) { - pr_debug("xen-blkback: access denied: %s of [%llu,%llu] on dev=%04x\n", + pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n", operation == READ ? "read" : "write", preq.sector_number, preq.sector_number + preq.nr_sects, preq.dev); @@ -602,7 +602,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, for (i = 0; i < nseg; i++) { if (((int)preq.sector_number|(int)seg[i].nsec) & ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { - pr_debug("xen-blkback: Misaligned I/O request from domain %d", + pr_debug(DRV_PFX "Misaligned I/O request from domain %d", blkif->domid); goto fail_response; } @@ -759,7 +759,7 @@ static int __init xen_blkif_init(void) blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); if (!blkbk) { - pr_alert("xen-blkback: %s: out of memory!\n", __func__); + pr_alert(DRV_PFX "%s: out of memory!\n", __func__); return -ENOMEM; } @@ -807,7 +807,7 @@ static int __init xen_blkif_init(void) return 0; out_of_memory: - pr_alert("xen-blkback: %s: out of memory\n", __func__); + pr_alert(DRV_PFX "%s: out of memory\n", __func__); failed_init: kfree(blkbk->pending_reqs); kfree(blkbk->pending_grant_handles); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 46e5d0630440..da96e3eaa641 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -42,8 +42,9 @@ #include #include +#define DRV_PFX "xen-blkback:" #define DPRINTK(fmt, args...) \ - pr_debug("xen-blkback: (%s:%d) " fmt ".\n", \ + pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ __func__, __LINE__, ##args) struct vbd { diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index fa01dbbee0ad..1c3fa6507e6d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -504,7 +504,7 @@ static void backend_changed(struct xenbus_watch *watch, if ((be->major || be->minor) && ((be->major != major) || (be->minor != minor))) { - pr_warn("xen-blkback: changing physical device (from %x:%x to %x:%x) not supported.\n", + pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", be->major, be->minor, major, minor); return; } @@ -571,7 +571,7 @@ static void frontend_changed(struct xenbus_device *dev, switch (frontend_state) { case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { - pr_info("xen-blkback: %s: prepare for reconnect\n", + pr_info(DRV_PFX "%s: prepare for reconnect\n", dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); } @@ -726,7 +726,7 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -1; } - pr_info("xen-blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", ring_ref, evtchn, be->blkif->blk_protocol, protocol); /* Map the shared frame, irq etc. */ -- cgit v1.2.3 From 72468bfcb815bc9875a870973469f68e20c78717 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 11 May 2011 16:21:08 -0400 Subject: xen/blkback: Removing the debug_lvl option. It is not really used for anything. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 09fe528dd088..453b51ac737f 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -62,9 +62,7 @@ MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); /* Run-time switchable: /sys/module/blkback/parameters/ */ static unsigned int log_stats; -static unsigned int debug_lvl; module_param(log_stats, int, 0644); -module_param(debug_lvl, int, 0644); /* * Each outstanding request that we've passed to the lower device layers has a @@ -269,9 +267,6 @@ int xen_blkif_schedule(void *arg) xen_blkif_get(blkif); - if (debug_lvl) - pr_debug(DRV_PFX "%s: started\n", current->comm); - while (!kthread_should_stop()) { if (try_to_freeze()) continue; @@ -298,8 +293,6 @@ int xen_blkif_schedule(void *arg) if (log_stats) print_stats(blkif); - if (debug_lvl) - pr_debug(DRV_PFX "%s: exiting\n", current->comm); blkif->xenblkd = NULL; xen_blkif_put(blkif); -- cgit v1.2.3 From 68c88dd7d3caf1737112238fbe91cccd8e7a69fc Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 11 May 2011 16:23:39 -0400 Subject: xen/blkback: Move blkif_get_x86_[32|64]_req to common.h in block/xen-blkback dir. From the blkif.h header, which was exposed to the frontend. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index da96e3eaa641..647d974da392 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -132,4 +132,36 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); +static void inline blkif_get_x86_32_req(struct blkif_request *dst, + struct blkif_x86_32_request *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->u.rw.sector_number = src->sector_number; + barrier(); + if (n > dst->nr_segments) + n = dst->nr_segments; + for (i = 0; i < n; i++) + dst->u.rw.seg[i] = src->seg[i]; +} + +static void inline blkif_get_x86_64_req(struct blkif_request *dst, + struct blkif_x86_64_request *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->u.rw.sector_number = src->sector_number; + barrier(); + if (n > dst->nr_segments) + n = dst->nr_segments; + for (i = 0; i < n; i++) + dst->u.rw.seg[i] = src->seg[i]; +} + #endif /* __BLKIF__BACKEND__COMMON_H__ */ -- cgit v1.2.3 From b9fc02968c5dd3c0461b4bb126499a17b13fb86e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 11 May 2011 16:26:59 -0400 Subject: xen/blkback: Fix spelling mistakes. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 1c3fa6507e6d..5d2bbf6240c8 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -351,7 +351,7 @@ static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, FMODE_READ : FMODE_WRITE, NULL); if (IS_ERR(bdev)) { - DPRINTK("vbd_creat: device %08x could not be opened.\n", + DPRINTK("vbd_create: device %08x could not be opened.\n", vbd->pdevice); return -ENOENT; } @@ -360,7 +360,7 @@ static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, vbd->size = vbd_sz(vbd); if (vbd->bdev->bd_disk == NULL) { - DPRINTK("vbd_creat: device %08x doesn't exist.\n", + DPRINTK("vbd_create: device %08x doesn't exist.\n", vbd->pdevice); vbd_free(vbd); return -ENOENT; -- cgit v1.2.3 From a4c348580e65c95d4b278bb6f154f622df12b893 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:10:55 -0400 Subject: xen/blkback: Flesh out the description in the Kconfig. with more details. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/Kconfig | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 9abb64689712..717d6e4e18d3 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -478,6 +478,19 @@ config XEN_BLKDEV_BACKEND block devices to other guests via a high-performance shared-memory interface. + The corresponding Linux frontend driver is enabled by the + CONFIG_XEN_BLKDEV_FRONTEND configuration option. + + The backend driver attaches itself to a any block device specified + in the XenBus configuration. There are no limits to what the block + device as long as it has a major and minor. + + If you are compiling a kernel to run in a Xen block backend driver + domain (often this is domain 0) you should say Y here. To + compile this driver as a module, chose M here: the module + will be called xen-blkback. + + config VIRTIO_BLK tristate "Virtio block driver (EXPERIMENTAL)" depends on EXPERIMENTAL && VIRTIO -- cgit v1.2.3 From 03e0edf946a08f498788bb6e8ab58453d98f25b9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:19:23 -0400 Subject: xen/blkback: Checkpatch.pl recommend against multiple assigments. CHECK: multiple assignments should be avoided Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 9 ++++++--- drivers/block/xen-blkback/xenbus.c | 6 ++++-- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 453b51ac737f..362fbf6f656d 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -620,10 +620,11 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { - bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i); + bio = bio_alloc(GFP_KERNEL, nseg-i); if (unlikely(bio == NULL)) goto fail_put_bio; + biolist[nbio++] = bio; bio->bi_bdev = preq.bdev; bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; @@ -636,10 +637,12 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, /* This will be hit if the operation was a flush. */ if (!bio) { BUG_ON(operation != WRITE_FLUSH); - bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); + + bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; + biolist[nbio++] = bio; bio->bi_bdev = preq.bdev; bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; @@ -677,7 +680,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, return -EIO; fail_put_bio: - for (i = 0; i < (nbio-1); i++) + for (i = 0; i < nbio; i++) bio_put(biolist[i]); __end_block_io_op(pending_req, -EINVAL); msleep(1); /* back off a bit */ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 5d2bbf6240c8..ba8d30662d19 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -538,7 +538,8 @@ static void backend_changed(struct xenbus_watch *watch, err = vbd_create(be->blkif, handle, major, minor, (NULL == strchr(be->mode, 'w')), cdrom); if (err) { - be->major = be->minor = 0; + be->major = 0; + be->minor = 0; xenbus_dev_fatal(dev, err, "creating vbd structure"); return; } @@ -546,7 +547,8 @@ static void backend_changed(struct xenbus_watch *watch, err = xenvbd_sysfs_addif(dev); if (err) { vbd_free(&be->blkif->vbd); - be->major = be->minor = 0; + be->major = 0; + be->minor = 0; xenbus_dev_fatal(dev, err, "creating sysfs entries"); return; } -- cgit v1.2.3 From b0f801273f7359a7d91fc94f5c6bf216bc17aaa1 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:23:06 -0400 Subject: xen/blkback: Fixing some more of the cleanpatch.pl warnings. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 2 +- drivers/block/xen-blkback/common.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 362fbf6f656d..d06eb6a50d57 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -637,7 +637,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, /* This will be hit if the operation was a flush. */ if (!bio) { BUG_ON(operation != WRITE_FLUSH); - + bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 647d974da392..629546558a47 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -132,7 +132,7 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); -static void inline blkif_get_x86_32_req(struct blkif_request *dst, +static inline void blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_x86_32_request *src) { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; @@ -148,7 +148,7 @@ static void inline blkif_get_x86_32_req(struct blkif_request *dst, dst->u.rw.seg[i] = src->seg[i]; } -static void inline blkif_get_x86_64_req(struct blkif_request *dst, +static inline void blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src) { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; -- cgit v1.2.3 From 452a6b2bb6de677acdd2ccb8b39cf6e8fe06f306 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:31:51 -0400 Subject: xen/blkback: Move include/xen/blkif.h into drivers/block/xen-blkback/common.h Not point of the blkif.h file. It is not used by the frontend. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 72 +++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 629546558a47..b8856fe2568f 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -38,15 +38,85 @@ #include #include #include -#include #include #include +#include +#include +#include #define DRV_PFX "xen-blkback:" #define DPRINTK(fmt, args...) \ pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ __func__, __LINE__, ##args) + +/* Not a real protocol. Used to generate ring structs which contain + * the elements common to all protocols only. This way we get a + * compiler-checkable way to use common struct elements, so we can + * avoid using switch(protocol) in a number of places. */ +struct blkif_common_request { + char dummy; +}; +struct blkif_common_response { + char dummy; +}; + +/* i386 protocol version */ +#pragma pack(push, 4) +struct blkif_x86_32_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_32_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_32_request blkif_x86_32_request_t; +typedef struct blkif_x86_32_response blkif_x86_32_response_t; +#pragma pack(pop) + +/* x86_64 protocol version */ +struct blkif_x86_64_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t __attribute__((__aligned__(8))) id; + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_64_response { + uint64_t __attribute__((__aligned__(8))) id; + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_64_request blkif_x86_64_request_t; +typedef struct blkif_x86_64_response blkif_x86_64_response_t; + +DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, + struct blkif_common_response); +DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, + struct blkif_x86_32_response); +DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, + struct blkif_x86_64_response); + +union blkif_back_rings { + struct blkif_back_ring native; + struct blkif_common_back_ring common; + struct blkif_x86_32_back_ring x86_32; + struct blkif_x86_64_back_ring x86_64; +}; + +enum blkif_protocol { + BLKIF_PROTOCOL_NATIVE = 1, + BLKIF_PROTOCOL_X86_32 = 2, + BLKIF_PROTOCOL_X86_64 = 3, +}; + struct vbd { /* What the domain refers to this vbd as. */ blkif_vdev_t handle; -- cgit v1.2.3 From 325a64860472765ecaeaa0081e9ddd67671183d4 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:37:04 -0400 Subject: xen/blkback: Remove the unused typedefs. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index b8856fe2568f..b0db6aabb5b8 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -76,8 +76,6 @@ struct blkif_x86_32_response { uint8_t operation; /* copied from request */ int16_t status; /* BLKIF_RSP_??? */ }; -typedef struct blkif_x86_32_request blkif_x86_32_request_t; -typedef struct blkif_x86_32_response blkif_x86_32_response_t; #pragma pack(pop) /* x86_64 protocol version */ @@ -94,8 +92,6 @@ struct blkif_x86_64_response { uint8_t operation; /* copied from request */ int16_t status; /* BLKIF_RSP_??? */ }; -typedef struct blkif_x86_64_request blkif_x86_64_request_t; -typedef struct blkif_x86_64_response blkif_x86_64_response_t; DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response); -- cgit v1.2.3 From 30fd150202fb2d08a62f9c2966a4b1fcf2e861e7 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:47:48 -0400 Subject: xen/blkback: Change structure name blkif_st to xen_blkif. No need for that '_st' and xen_blkif is more apt. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 28 ++++++++++++++-------------- drivers/block/xen-blkback/common.h | 2 +- drivers/block/xen-blkback/xenbus.c | 24 ++++++++++++------------ 3 files changed, 27 insertions(+), 27 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index d06eb6a50d57..d438781ecc7c 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -71,7 +71,7 @@ module_param(log_stats, int, 0644); * response queued for it, with the saved 'id' passed back. */ struct pending_req { - struct blkif_st *blkif; + struct xen_blkif *blkif; u64 id; int nr_pages; atomic_t pendcnt; @@ -121,11 +121,11 @@ static inline unsigned long vaddr(struct pending_req *req, int seg) (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) -static int do_block_io_op(struct blkif_st *blkif); -static int dispatch_rw_block_io(struct blkif_st *blkif, +static int do_block_io_op(struct xen_blkif *blkif); +static int dispatch_rw_block_io(struct xen_blkif *blkif, struct blkif_request *req, struct pending_req *pending_req); -static void make_response(struct blkif_st *blkif, u64 id, +static void make_response(struct xen_blkif *blkif, u64 id, unsigned short op, int st); /* @@ -166,7 +166,7 @@ static void free_req(struct pending_req *req) /* * Routines for managing virtual block devices (vbds). */ -static int vbd_translate(struct phys_req *req, struct blkif_st *blkif, +static int vbd_translate(struct phys_req *req, struct xen_blkif *blkif, int operation) { struct vbd *vbd = &blkif->vbd; @@ -186,7 +186,7 @@ static int vbd_translate(struct phys_req *req, struct blkif_st *blkif, return rc; } -static void vbd_resize(struct blkif_st *blkif) +static void vbd_resize(struct xen_blkif *blkif) { struct vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; @@ -233,7 +233,7 @@ abort: /* * Notification from the guest OS. */ -static void blkif_notify_work(struct blkif_st *blkif) +static void blkif_notify_work(struct xen_blkif *blkif) { blkif->waiting_reqs = 1; wake_up(&blkif->wq); @@ -249,7 +249,7 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) * SCHEDULER FUNCTIONS */ -static void print_stats(struct blkif_st *blkif) +static void print_stats(struct xen_blkif *blkif) { pr_debug("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", current->comm, blkif->st_oo_req, @@ -262,7 +262,7 @@ static void print_stats(struct blkif_st *blkif) int xen_blkif_schedule(void *arg) { - struct blkif_st *blkif = arg; + struct xen_blkif *blkif = arg; struct vbd *vbd = &blkif->vbd; xen_blkif_get(blkif); @@ -451,7 +451,7 @@ static void end_block_io_op(struct bio *bio, int error) * (which has the sectors we want, number of them, grant references, etc), * and transmute it to the block API to hand it over to the proper block disk. */ -static int do_block_io_op(struct blkif_st *blkif) +static int do_block_io_op(struct xen_blkif *blkif) { union blkif_back_rings *blk_rings = &blkif->blk_rings; struct blkif_request req; @@ -512,9 +512,9 @@ static int do_block_io_op(struct blkif_st *blkif) * Transmutation of the 'struct blkif_request' to a proper 'struct bio' * and call the 'submit_bio' to pass it to the underlying storage. */ -static int dispatch_rw_block_io(struct blkif_st *blkif, - struct blkif_request *req, - struct pending_req *pending_req) +static int dispatch_rw_block_io(struct xen_blkif *blkif, + struct blkif_request *req, + struct pending_req *pending_req) { struct phys_req preq; struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -692,7 +692,7 @@ static int dispatch_rw_block_io(struct blkif_st *blkif, /* * Put a response on the ring on how the operation fared. */ -static void make_response(struct blkif_st *blkif, u64 id, +static void make_response(struct xen_blkif *blkif, u64 id, unsigned short op, int st) { struct blkif_response resp; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index b0db6aabb5b8..722c048663d6 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -130,7 +130,7 @@ struct vbd { struct backend_info; -struct blkif_st { +struct xen_blkif { /* Unique identifier for this interface. */ domid_t domid; unsigned int handle; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index ba8d30662d19..f355f7a5d52d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -23,7 +23,7 @@ struct backend_info { struct xenbus_device *dev; - struct blkif_st *blkif; + struct xen_blkif *blkif; struct xenbus_watch backend_watch; unsigned major; unsigned minor; @@ -41,7 +41,7 @@ struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) return be->dev; } -static int blkback_name(struct blkif_st *blkif, char *buf) +static int blkback_name(struct xen_blkif *blkif, char *buf) { char *devpath, *devname; struct xenbus_device *dev = blkif->be->dev; @@ -62,7 +62,7 @@ static int blkback_name(struct blkif_st *blkif, char *buf) return 0; } -static void xen_update_blkif_status(struct blkif_st *blkif) +static void xen_update_blkif_status(struct xen_blkif *blkif) { int err; char name[TASK_COMM_LEN]; @@ -101,9 +101,9 @@ static void xen_update_blkif_status(struct blkif_st *blkif) } } -static struct blkif_st *xen_blkif_alloc(domid_t domid) +static struct xen_blkif *xen_blkif_alloc(domid_t domid) { - struct blkif_st *blkif; + struct xen_blkif *blkif; blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL); if (!blkif) @@ -120,7 +120,7 @@ static struct blkif_st *xen_blkif_alloc(domid_t domid) return blkif; } -static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) +static int map_frontend_page(struct xen_blkif *blkif, unsigned long shared_page) { struct gnttab_map_grant_ref op; @@ -141,7 +141,7 @@ static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) return 0; } -static void unmap_frontend_page(struct blkif_st *blkif) +static void unmap_frontend_page(struct xen_blkif *blkif) { struct gnttab_unmap_grant_ref op; @@ -152,7 +152,7 @@ static void unmap_frontend_page(struct blkif_st *blkif) BUG(); } -static int xen_blkif_map(struct blkif_st *blkif, unsigned long shared_page, +static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, unsigned int evtchn) { int err; @@ -211,7 +211,7 @@ static int xen_blkif_map(struct blkif_st *blkif, unsigned long shared_page, return 0; } -static void xen_blkif_disconnect(struct blkif_st *blkif) +static void xen_blkif_disconnect(struct xen_blkif *blkif) { if (blkif->xenblkd) { kthread_stop(blkif->xenblkd); @@ -234,7 +234,7 @@ static void xen_blkif_disconnect(struct blkif_st *blkif) } } -void xen_blkif_free(struct blkif_st *blkif) +void xen_blkif_free(struct xen_blkif *blkif) { if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); @@ -244,7 +244,7 @@ void xen_blkif_free(struct blkif_st *blkif) int __init xen_blkif_interface_init(void) { xen_blkif_cachep = kmem_cache_create("blkif_cache", - sizeof(struct blkif_st), + sizeof(struct xen_blkif), 0, 0, NULL); if (!xen_blkif_cachep) return -ENOMEM; @@ -332,7 +332,7 @@ static void vbd_free(struct vbd *vbd) vbd->bdev = NULL; } -static int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, +static int vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, unsigned major, unsigned minor, int readonly, int cdrom) { -- cgit v1.2.3 From 3d814731ba67f9514bdf380c1b95dd852ac82a2f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:53:56 -0400 Subject: xen/blkback: Prefix 'vbd' with 'xen' in structs and functions. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 20 ++++++++++---------- drivers/block/xen-blkback/common.h | 4 ++-- drivers/block/xen-blkback/xenbus.c | 34 +++++++++++++++++----------------- 3 files changed, 29 insertions(+), 29 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index d438781ecc7c..1e454a30e4e1 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -166,10 +166,10 @@ static void free_req(struct pending_req *req) /* * Routines for managing virtual block devices (vbds). */ -static int vbd_translate(struct phys_req *req, struct xen_blkif *blkif, - int operation) +static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, + int operation) { - struct vbd *vbd = &blkif->vbd; + struct xen_vbd *vbd = &blkif->vbd; int rc = -EACCES; if ((operation != READ) && vbd->readonly) @@ -186,9 +186,9 @@ static int vbd_translate(struct phys_req *req, struct xen_blkif *blkif, return rc; } -static void vbd_resize(struct xen_blkif *blkif) +static void xen_vbd_resize(struct xen_blkif *blkif) { - struct vbd *vbd = &blkif->vbd; + struct xen_vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; int err; struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); @@ -263,7 +263,7 @@ static void print_stats(struct xen_blkif *blkif) int xen_blkif_schedule(void *arg) { struct xen_blkif *blkif = arg; - struct vbd *vbd = &blkif->vbd; + struct xen_vbd *vbd = &blkif->vbd; xen_blkif_get(blkif); @@ -271,7 +271,7 @@ int xen_blkif_schedule(void *arg) if (try_to_freeze()) continue; if (unlikely(vbd->size != vbd_sz(vbd))) - vbd_resize(blkif); + xen_vbd_resize(blkif); wait_event_interruptible( blkif->wq, @@ -538,7 +538,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, blkif->st_f_req++; operation = WRITE_FLUSH; /* - * The frontend likes to set this to -1, which vbd_translate + * The frontend likes to set this to -1, which xen_vbd_translate * is alergic too. */ req->u.rw.sector_number = 0; @@ -580,7 +580,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, } - if (vbd_translate(&preq, blkif, operation) != 0) { + if (xen_vbd_translate(&preq, blkif, operation) != 0) { pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n", operation == READ ? "read" : "write", preq.sector_number, @@ -589,7 +589,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, } /* - * This check _MUST_ be done after vbd_translate as the preq.bdev + * This check _MUST_ be done after xen_vbd_translate as the preq.bdev * is set there. */ for (i = 0; i < nseg; i++) { diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 722c048663d6..1e4ccdeadef3 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -113,7 +113,7 @@ enum blkif_protocol { BLKIF_PROTOCOL_X86_64 = 3, }; -struct vbd { +struct xen_vbd { /* What the domain refers to this vbd as. */ blkif_vdev_t handle; /* Non-zero -> read-only */ @@ -141,7 +141,7 @@ struct xen_blkif { union blkif_back_rings blk_rings; struct vm_struct *blk_ring_area; /* The VBD attached to this interface. */ - struct vbd vbd; + struct xen_vbd vbd; /* Back pointer to the backend_info. */ struct backend_info *be; /* Private fields. */ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index f355f7a5d52d..e470d8869053 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -275,7 +275,7 @@ VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); -static struct attribute *vbdstat_attrs[] = { +static struct attribute *xen_vbdstat_attrs[] = { &dev_attr_oo_req.attr, &dev_attr_rd_req.attr, &dev_attr_wr_req.attr, @@ -285,9 +285,9 @@ static struct attribute *vbdstat_attrs[] = { NULL }; -static struct attribute_group vbdstat_group = { +static struct attribute_group xen_vbdstat_group = { .name = "statistics", - .attrs = vbdstat_attrs, + .attrs = xen_vbdstat_attrs, }; VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); @@ -305,13 +305,13 @@ int xenvbd_sysfs_addif(struct xenbus_device *dev) if (error) goto fail2; - error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group); + error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group); if (error) goto fail3; return 0; -fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); +fail3: sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); fail2: device_remove_file(&dev->dev, &dev_attr_mode); fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); return error; @@ -319,24 +319,24 @@ fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); void xenvbd_sysfs_delif(struct xenbus_device *dev) { - sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); + sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); device_remove_file(&dev->dev, &dev_attr_mode); device_remove_file(&dev->dev, &dev_attr_physical_device); } -static void vbd_free(struct vbd *vbd) +static void xen_vbd_free(struct xen_vbd *vbd) { if (vbd->bdev) blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); vbd->bdev = NULL; } -static int vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, - unsigned major, unsigned minor, int readonly, - int cdrom) +static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, + unsigned major, unsigned minor, int readonly, + int cdrom) { - struct vbd *vbd; + struct xen_vbd *vbd; struct block_device *bdev; struct request_queue *q; @@ -351,7 +351,7 @@ static int vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, FMODE_READ : FMODE_WRITE, NULL); if (IS_ERR(bdev)) { - DPRINTK("vbd_create: device %08x could not be opened.\n", + DPRINTK("xen_vbd_create: device %08x could not be opened.\n", vbd->pdevice); return -ENOENT; } @@ -360,9 +360,9 @@ static int vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, vbd->size = vbd_sz(vbd); if (vbd->bdev->bd_disk == NULL) { - DPRINTK("vbd_create: device %08x doesn't exist.\n", + DPRINTK("xen_vbd_create: device %08x doesn't exist.\n", vbd->pdevice); - vbd_free(vbd); + xen_vbd_free(vbd); return -ENOENT; } @@ -396,7 +396,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev) if (be->blkif) { xen_blkif_disconnect(be->blkif); - vbd_free(&be->blkif->vbd); + xen_vbd_free(&be->blkif->vbd); xen_blkif_free(be->blkif); be->blkif = NULL; } @@ -535,7 +535,7 @@ static void backend_changed(struct xenbus_watch *watch, be->major = major; be->minor = minor; - err = vbd_create(be->blkif, handle, major, minor, + err = xen_vbd_create(be->blkif, handle, major, minor, (NULL == strchr(be->mode, 'w')), cdrom); if (err) { be->major = 0; @@ -546,7 +546,7 @@ static void backend_changed(struct xenbus_watch *watch, err = xenvbd_sysfs_addif(dev); if (err) { - vbd_free(&be->blkif->vbd); + xen_vbd_free(&be->blkif->vbd); be->major = 0; be->minor = 0; xenbus_dev_fatal(dev, err, "creating sysfs entries"); -- cgit v1.2.3 From 5a577e38724226e06337bc8361f492b6bb76b9a5 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 16:58:21 -0400 Subject: xen/blkback: Add the prefix XEN in the common.h. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 1e4ccdeadef3..9e40b283a468 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -24,8 +24,8 @@ * IN THE SOFTWARE. */ -#ifndef __BLKIF__BACKEND__COMMON_H__ -#define __BLKIF__BACKEND__COMMON_H__ +#ifndef __XEN_BLKIF__BACKEND__COMMON_H__ +#define __XEN_BLKIF__BACKEND__COMMON_H__ #include #include @@ -230,4 +230,4 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, dst->u.rw.seg[i] = src->seg[i]; } -#endif /* __BLKIF__BACKEND__COMMON_H__ */ +#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */ -- cgit v1.2.3 From cca537af7d6defe8001c2229da738f8a3c934fc0 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 17:23:30 -0400 Subject: xen/blkback: if log_stats is enabled print out the data. And not depend on the driver being built with -DDEBUG flag. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 1e454a30e4e1..9dee5454740f 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -251,7 +251,7 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct xen_blkif *blkif) { - pr_debug("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", + pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", current->comm, blkif->st_oo_req, blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); -- cgit v1.2.3 From 5185432277ddf5bd91ad5af29cd1945f25ed10fc Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 12 May 2011 18:02:28 -0400 Subject: xen/blkback: Align the tabs on the structure. The recent changes caused this field of the structure to be offset a bit. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index e470d8869053..34570823355b 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -23,7 +23,7 @@ struct backend_info { struct xenbus_device *dev; - struct xen_blkif *blkif; + struct xen_blkif *blkif; struct xenbus_watch backend_watch; unsigned major; unsigned minor; -- cgit v1.2.3 From 496b318eb65558c1a3a4fe882cb9da6d1dc6493a Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Fri, 13 May 2011 09:45:40 -0400 Subject: xen/blkback: fix xenbus_transaction_start() hang caused by double xenbus_transaction_end() vbd_resize() up_read()'s xs_state.suspend_mutex twice in a row via double xenbus_transaction_end() calls. The next down_read() in xenbus_transaction_start() (at eg. the next resize attempt) hangs. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=618317 Acked-by: Jan Beulich Acked-by: Ian Campbell Signed-off-by: Laszlo Ersek Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 9dee5454740f..dba55e3a4a86 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -226,6 +226,7 @@ again: goto again; if (err) pr_warn(DRV_PFX "Error ending transaction"); + return; abort: xenbus_transaction_end(xbt, 1); } -- cgit v1.2.3 From 8ab521506c4dbb144f0c04c55e3d8bec42c1b2b9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 17 May 2011 11:07:05 +0100 Subject: xen/blkback: don't fail empty barrier requests The sector number on empty barrier requests may (will?) be -1, which, given that it's being treated as unsigned 64-bit quantity, will almost always exceed the actual (virtual) disk's size. Inspired by Konrad's "When writting barriers set the sector number to zero...". While at it also add overflow checking to the math in vbd_translate(). Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index dba55e3a4a86..c73910cc28c9 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -175,8 +175,14 @@ static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, if ((operation != READ) && vbd->readonly) goto out; - if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) - goto out; + if (likely(req->nr_sects)) { + blkif_sector_t end = req->sector_number + req->nr_sects; + + if (unlikely(end < req->sector_number)) + goto out; + if (unlikely(end > vbd_sz(vbd))) + goto out; + } req->dev = vbd->pdevice; req->bdev = vbd->bdev; @@ -538,11 +544,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, case BLKIF_OP_FLUSH_DISKCACHE: blkif->st_f_req++; operation = WRITE_FLUSH; - /* - * The frontend likes to set this to -1, which xen_vbd_translate - * is alergic too. - */ - req->u.rw.sector_number = 0; break; case BLKIF_OP_WRITE_BARRIER: default: -- cgit v1.2.3 From 738a84b25cac5af94936e5a1b15cd9909212383c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 3 Mar 2011 00:21:30 +0100 Subject: drbd: Fix for application IO with the on-io-error=pass-on policy In case a write failes on the local disk, go into D_INCONSISTENT disk state. That causes future reads of that block to be shipped to the peer. Read retry remote was already in place. Actually the documentation needs to get fixed now. Since the application is still shielded from the error. (as long as we have only a single disk failing) The difference to detach is that we keep the disk. And therefore might keep all the other, still working sectors up to date. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b2699bb2e530..2c38752ca8d6 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1827,6 +1827,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, if (!forcedetach) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s.\n", where); + _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL); break; } /* NOTE fall through to detach case if forcedetach set */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5b525c179f39..fd308864833f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1565,6 +1565,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, put_ldev(mdev); } + /* Notify peer that I had a local IO error, and did not detached.. */ + if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) + drbd_send_state(mdev); + /* Disks got bigger while they were detached */ if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { -- cgit v1.2.3 From 53ea433145d9a56c7ad5e69f21f5662053e00e84 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 8 Mar 2011 17:11:40 +0100 Subject: drbd: fix potential distributed deadlock We limit ourselves to a configurable maximum number of pages used as temporary bio pages. If the configured "max_buffers" is not big enough to match the bandwidth of the respective deployment, a distributed deadlock could be triggered by e.g. fast online verify and heavy application IO. TCP connections would block on congestion, because both receivers would wait on pages to become available. Fortunately the respective senders in this case would be able to give back some pages already. So do that. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 94 +++++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 35 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index f7e6c92f8d03..b5e53695fd7e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -297,42 +297,48 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * crypto_hash_final(&desc, digest); } -static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +/* TODO merge common code with w_e_end_ov_req */ +int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); int digest_size; void *digest; - int ok; + int ok = 1; D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef); - if (unlikely(cancel)) { - drbd_free_ee(mdev, e); - return 1; - } + if (unlikely(cancel)) + goto out; - if (likely((e->flags & EE_WAS_ERROR) == 0)) { - digest_size = crypto_hash_digestsize(mdev->csums_tfm); - digest = kmalloc(digest_size, GFP_NOIO); - if (digest) { - drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); + if (likely((e->flags & EE_WAS_ERROR) != 0)) + goto out; - inc_rs_pending(mdev); - ok = drbd_send_drequest_csum(mdev, - e->sector, - e->size, - digest, - digest_size, - P_CSUM_RS_REQUEST); - kfree(digest); - } else { - dev_err(DEV, "kmalloc() of digest failed.\n"); - ok = 0; - } - } else - ok = 1; + digest_size = crypto_hash_digestsize(mdev->csums_tfm); + digest = kmalloc(digest_size, GFP_NOIO); + if (digest) { + sector_t sector = e->sector; + unsigned int size = e->size; + drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); + /* Free e and pages before send. + * In case we block on congestion, we could otherwise run into + * some distributed deadlock, if the other side blocks on + * congestion as well, because our receiver blocks in + * drbd_pp_alloc due to pp_in_use > max_buffers. */ + drbd_free_ee(mdev, e); + e = NULL; + inc_rs_pending(mdev); + ok = drbd_send_drequest_csum(mdev, sector, size, + digest, digest_size, + P_CSUM_RS_REQUEST); + kfree(digest); + } else { + dev_err(DEV, "kmalloc() of digest failed.\n"); + ok = 0; + } - drbd_free_ee(mdev, e); +out: + if (e) + drbd_free_ee(mdev, e); if (unlikely(!ok)) dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); @@ -1071,9 +1077,12 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } +/* TODO merge common code with w_e_send_csum */ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + sector_t sector = e->sector; + unsigned int size = e->size; int digest_size; void *digest; int ok = 1; @@ -1093,17 +1102,25 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) else memset(digest, 0, digest_size); + /* Free e and pages before send. + * In case we block on congestion, we could otherwise run into + * some distributed deadlock, if the other side blocks on + * congestion as well, because our receiver blocks in + * drbd_pp_alloc due to pp_in_use > max_buffers. */ + drbd_free_ee(mdev, e); + e = NULL; inc_rs_pending(mdev); - ok = drbd_send_drequest_csum(mdev, e->sector, e->size, - digest, digest_size, P_OV_REPLY); + ok = drbd_send_drequest_csum(mdev, sector, size, + digest, digest_size, + P_OV_REPLY); if (!ok) dec_rs_pending(mdev); kfree(digest); out: - drbd_free_ee(mdev, e); + if (e) + drbd_free_ee(mdev, e); dec_unacked(mdev); - return ok; } @@ -1122,8 +1139,10 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); struct digest_info *di; - int digest_size; void *digest; + sector_t sector = e->sector; + unsigned int size = e->size; + int digest_size; int ok, eq = 0; if (unlikely(cancel)) { @@ -1153,16 +1172,21 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } } - dec_unacked(mdev); + /* Free e and pages before send. + * In case we block on congestion, we could otherwise run into + * some distributed deadlock, if the other side blocks on + * congestion as well, because our receiver blocks in + * drbd_pp_alloc due to pp_in_use > max_buffers. */ + drbd_free_ee(mdev, e); if (!eq) - drbd_ov_oos_found(mdev, e->sector, e->size); + drbd_ov_oos_found(mdev, sector, size); else ov_oos_print(mdev); - ok = drbd_send_ack_ex(mdev, P_OV_RESULT, e->sector, e->size, + ok = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); - drbd_free_ee(mdev, e); + dec_unacked(mdev); --mdev->ov_left; -- cgit v1.2.3 From f36af18c7b4ea1ba333c09b606bb4a7e5af66b4d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 9 Mar 2011 22:44:55 +0100 Subject: drbd: fix disconnect/reconnect loop, if ping-timeout == ping-int If there is no replication traffic within the idle timeout (ping-int seconds), DRBD will send a P_PING, and adjust the timeout to ping-timeout. If there is no P_PING_ACK received within this ping-timeout, DRBD finally drops the connection, and tries to re-establish it. To decide which timeout was active, we compared the current timeout with the ping-timeout, and dropped the connection, if that was the case. By default, ping-int is 10 seconds, ping-timeout is 500 ms. Unfortunately, if you configure ping-timeout to be the same as ping-int, expiry of the idle-timeout had been mistaken for a missing ping ack, and caused an immediate reconnection attempt. Fix: Allow both timeouts to be equal, use a local variable to store which timeout is active. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fd26666c0b08..0b17d426c32b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4554,6 +4554,7 @@ int drbd_asender(struct drbd_thread *thi) int received = 0; int expect = sizeof(struct p_header80); int empty; + int ping_timeout_active = 0; sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); @@ -4566,6 +4567,7 @@ int drbd_asender(struct drbd_thread *thi) ERR_IF(!drbd_send_ping(mdev)) goto reconnect; mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*HZ/10; + ping_timeout_active = 1; } /* conditionally cork; @@ -4620,8 +4622,7 @@ int drbd_asender(struct drbd_thread *thi) dev_err(DEV, "meta connection shut down by peer.\n"); goto reconnect; } else if (rv == -EAGAIN) { - if (mdev->meta.socket->sk->sk_rcvtimeo == - mdev->net_conf->ping_timeo*HZ/10) { + if (ping_timeout_active) { dev_err(DEV, "PingAck did not arrive in time.\n"); goto reconnect; } @@ -4660,6 +4661,11 @@ int drbd_asender(struct drbd_thread *thi) if (!cmd->process(mdev, h)) goto reconnect; + /* the idle_timeout (ping-int) + * has been restored in got_PingAck() */ + if (cmd == get_asender_cmd(P_PING_ACK)) + ping_timeout_active = 0; + buf = h; received = 0; expect = sizeof(struct p_header80); -- cgit v1.2.3 From d2e17807e3799bae24664a92f4d2d3dade021e00 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 14 Mar 2011 11:54:47 +0100 Subject: drbd: Only downgrade the disk state in case of disk failures Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2c38752ca8d6..5c994739d11e 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1827,7 +1827,8 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, if (!forcedetach) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s.\n", where); - _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL); + if (mdev->state.disk > D_INCONSISTENT) + _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL); break; } /* NOTE fall through to detach case if forcedetach set */ -- cgit v1.2.3 From 76727f684aa2d6a2dc59a7e5cf77e092a1bf4fb6 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 16 May 2011 15:31:45 +0200 Subject: drbd: fix potential activity log refcount imbalance in error path It is no longer sufficient to trigger on local WRITE, we need to check on (rq_state & RQ_IN_ACT_LOG) before calling drbd_al_complete_io also in the error path. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 5c0c8be1bb0a..14645bd40092 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1033,7 +1033,7 @@ fail_conflicting: err = 0; fail_free_complete: - if (rw == WRITE && local) + if (req->rq_state & RQ_IN_ACT_LOG) drbd_al_complete_io(mdev, sector); fail_and_free_req: if (local) { -- cgit v1.2.3 From a8e407925d49c521151dd24b6376c1f9a04a093f Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 13 May 2011 12:03:55 +0200 Subject: drbd: Fix for the connection problems on high latency links It seems that the real cause of all the issues where that we did not noticed in drbd_try_connect() when the other guy closes one socket if the round trip time gets higher than 100ms. There were that 100ms hard coded! Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0b17d426c32b..b0b0ba345e83 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -787,7 +787,7 @@ static int drbd_connect(struct drbd_conf *mdev) } if (sock && msock) { - schedule_timeout_interruptible(HZ / 10); + schedule_timeout_interruptible(mdev->net_conf->ping_timeo*HZ/10); ok = drbd_socket_okay(mdev, &sock); ok = drbd_socket_okay(mdev, &msock) && ok; if (ok) -- cgit v1.2.3 From fa7d939663b61f5c2bd3436d3aa126d4c0f47aa8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 17 May 2011 14:48:55 +0200 Subject: drbd: Disallow the peer_disk_state to be D_OUTDATED while connected Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index fd308864833f..ce6a764e905b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -745,6 +745,9 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) mdev->agreed_pro_version < 88) rv = SS_NOT_SUPPORTED; + else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) + rv = SS_CONNECTED_OUTDATES; + return rv; } -- cgit v1.2.3 From 21423fa79119a80e335de0c82ec29f67ed59f1bc Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 17 May 2011 14:19:41 +0200 Subject: drbd: Fixed state transitions after async outdate-peer-handler returned Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 03b29f78a37d..9dfe58a09625 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -272,9 +272,22 @@ static int _try_outdate_peer_async(void *data) { struct drbd_conf *mdev = (struct drbd_conf *)data; enum drbd_disk_state nps; + union drbd_state ns; nps = drbd_try_outdate_peer(mdev); - drbd_request_state(mdev, NS(pdsk, nps)); + + /* Not using + drbd_request_state(mdev, NS(pdsk, nps)); + here, because we might were able to re-establish the connection in the + meantime. + */ + spin_lock_irq(&mdev->req_lock); + ns = mdev->state; + if (ns.conn < C_WF_REPORT_PARAMS) { + ns.pdsk = nps; + _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); + } + spin_unlock_irq(&mdev->req_lock); return 0; } -- cgit v1.2.3 From 99432fcc528d7a5ac8494a4c07ad4726670c96e2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 20 May 2011 16:39:13 +0200 Subject: drbd: Take a more conservative approach when deciding max_bio_size The old (optimistic) implementation could shrink the bio size on an primary device. Shrinking the bio size on a primary device is bad. Since there we might get BIOs with the old (bigger) size shortly after we published the new size. The new implementation is more conservative, and eventually increases the max_bio_size on a primary device (which is valid). It does so, when it knows the local limit AND the remote limit. We cache the last seen max_bio_size of the peer in the meta data, and rely on that, to make the operation of single nodes more efficient. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 5 +- drivers/block/drbd/drbd_main.c | 26 +++++++++-- drivers/block/drbd/drbd_nl.c | 96 +++++++++++++++++++++++++++----------- drivers/block/drbd/drbd_receiver.c | 20 ++------ 4 files changed, 97 insertions(+), 50 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 5c994739d11e..8aa10391115b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1128,6 +1128,8 @@ struct drbd_conf { int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ int rs_planed; /* resync sectors already planned */ atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ + int peer_max_bio_size; + int local_max_bio_size; }; static inline struct drbd_conf *minor_to_mdev(unsigned int minor) @@ -1433,6 +1435,7 @@ struct bm_extent { * hash table. */ #define HT_SHIFT 8 #define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) +#define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */ #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ @@ -1519,7 +1522,7 @@ extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); extern void resync_after_online_grow(struct drbd_conf *); -extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); +extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev); extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ce6a764e905b..cfeb13b5a216 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2071,7 +2071,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl { struct p_sizes p; sector_t d_size, u_size; - int q_order_type; + int q_order_type, max_bio_size; int ok; if (get_ldev_if_state(mdev, D_NEGOTIATING)) { @@ -2079,17 +2079,20 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl d_size = drbd_get_max_capacity(mdev->ldev); u_size = mdev->ldev->dc.disk_size; q_order_type = drbd_queue_order_type(mdev); + max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9; + max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE); put_ldev(mdev); } else { d_size = 0; u_size = 0; q_order_type = QUEUE_ORDERED_NONE; + max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ } p.d_size = cpu_to_be64(d_size); p.u_size = cpu_to_be64(u_size); p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); - p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9); + p.max_bio_size = cpu_to_be32(max_bio_size); p.queue_order_type = cpu_to_be16(q_order_type); p.dds_flags = cpu_to_be16(flags); @@ -3048,6 +3051,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) mdev->agreed_pro_version = PRO_VERSION_MAX; mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; + mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; + mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; } void drbd_mdev_cleanup(struct drbd_conf *mdev) @@ -3422,7 +3427,9 @@ struct drbd_conf *drbd_new_device(unsigned int minor) q->backing_dev_info.congested_data = mdev; blk_queue_make_request(q, drbd_make_request); - blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9); + /* Setting the max_hw_sectors to an odd value of 8kibyte here + This triggers a max_bio_size message upon first attach or connect */ + blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); blk_queue_merge_bvec(q, drbd_merge_bvec); q->queue_lock = &mdev->req_lock; @@ -3634,7 +3641,8 @@ struct meta_data_on_disk { /* `-- act_log->nr_elements <-- sync_conf.al_extents */ u32 bm_offset; /* offset to the bitmap, from here */ u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ - u32 reserved_u32[4]; + u32 la_peer_max_bio_size; /* last peer max_bio_size */ + u32 reserved_u32[3]; } __packed; @@ -3675,6 +3683,7 @@ void drbd_md_sync(struct drbd_conf *mdev) buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid); buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset); + buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size); D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset); sector = mdev->ldev->md.md_offset; @@ -3758,6 +3767,15 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); + spin_lock_irq(&mdev->req_lock); + if (mdev->state.conn < C_CONNECTED) { + int peer; + peer = be32_to_cpu(buffer->la_peer_max_bio_size); + peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE); + mdev->peer_max_bio_size = peer; + } + spin_unlock_irq(&mdev->req_lock); + if (mdev->sync_conf.al_extents < 7) mdev->sync_conf.al_extents = 127; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9dfe58a09625..7c64ec042124 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -278,8 +278,14 @@ static int _try_outdate_peer_async(void *data) /* Not using drbd_request_state(mdev, NS(pdsk, nps)); - here, because we might were able to re-establish the connection in the - meantime. + here, because we might were able to re-establish the connection + in the meantime. This can only partially be solved in the state's + engine is_valid_state() and is_valid_state_transition() + functions. + + nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN. + pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid, + therefore we have to have the pre state change check here. */ spin_lock_irq(&mdev->req_lock); ns = mdev->state; @@ -786,30 +792,78 @@ static int drbd_check_al_size(struct drbd_conf *mdev) return 0; } -void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local) +static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) { struct request_queue * const q = mdev->rq_queue; - struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; - int max_segments = mdev->ldev->dc.max_bio_bvecs; - int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); + int max_hw_sectors = max_bio_size >> 9; + int max_segments = 0; + + if (get_ldev_if_state(mdev, D_ATTACHING)) { + struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; + + max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); + max_segments = mdev->ldev->dc.max_bio_bvecs; + put_ldev(mdev); + } blk_queue_logical_block_size(q, 512); blk_queue_max_hw_sectors(q, max_hw_sectors); /* This is the workaround for "bio would need to, but cannot, be split" */ blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1); - blk_queue_stack_limits(q, b); - dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9); + if (get_ldev_if_state(mdev, D_ATTACHING)) { + struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; - if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { - dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", - q->backing_dev_info.ra_pages, - b->backing_dev_info.ra_pages); - q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; + blk_queue_stack_limits(q, b); + + if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { + dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", + q->backing_dev_info.ra_pages, + b->backing_dev_info.ra_pages); + q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; + } + put_ldev(mdev); } } +void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) +{ + int now, new, local, peer; + + now = queue_max_hw_sectors(mdev->rq_queue) << 9; + local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */ + peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */ + + if (get_ldev_if_state(mdev, D_ATTACHING)) { + local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9; + mdev->local_max_bio_size = local; + put_ldev(mdev); + } + + /* We may ignore peer limits if the peer is modern enough. + Because new from 8.3.8 onwards the peer can use multiple + BIOs for a single peer_request */ + if (mdev->state.conn >= C_CONNECTED) { + if (mdev->agreed_pro_version < 94) + peer = mdev->peer_max_bio_size; + else if (mdev->agreed_pro_version == 94) + peer = DRBD_MAX_SIZE_H80_PACKET; + else /* drbd 8.3.8 onwards */ + peer = DRBD_MAX_BIO_SIZE; + } + + new = min_t(int, local, peer); + + if (mdev->state.role == R_PRIMARY && new < now) + dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now); + + if (new != now) + dev_info(DEV, "max BIO size = %u\n", new); + + drbd_setup_queue_param(mdev, new); +} + /* serialize deconfig (worker exiting, doing cleanup) * and reconfig (drbdsetup disk, drbdsetup net) * @@ -878,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp struct block_device *bdev; struct lru_cache *resync_lru = NULL; union drbd_state ns, os; - unsigned int max_bio_size; enum drbd_state_rv rv; int cp_discovered = 0; int logical_block_size; @@ -1130,20 +1183,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp mdev->read_cnt = 0; mdev->writ_cnt = 0; - max_bio_size = DRBD_MAX_BIO_SIZE; - if (mdev->state.conn == C_CONNECTED) { - /* We are Primary, Connected, and now attach a new local - * backing store. We must not increase the user visible maximum - * bio size on this device to something the peer may not be - * able to handle. */ - if (mdev->agreed_pro_version < 94) - max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9; - else if (mdev->agreed_pro_version == 94) - max_bio_size = DRBD_MAX_SIZE_H80_PACKET; - /* else: drbd 8.3.9 and later, stay with default */ - } - - drbd_setup_queue_param(mdev, max_bio_size); + drbd_reconsider_max_bio_size(mdev); /* If I am currently not R_PRIMARY, * but meta data primary indicator is set, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b0b0ba345e83..6ea0a4b51ece 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -899,11 +899,6 @@ retry: drbd_thread_start(&mdev->asender); - if (mdev->agreed_pro_version < 95 && get_ldev(mdev)) { - drbd_setup_queue_param(mdev, DRBD_MAX_SIZE_H80_PACKET); - put_ldev(mdev); - } - if (drbd_send_protocol(mdev) == -1) return -1; drbd_send_sync_param(mdev, &mdev->sync_conf); @@ -2939,7 +2934,6 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned { struct p_sizes *p = &mdev->data.rbuf.sizes; enum determine_dev_size dd = unchanged; - unsigned int max_bio_size; sector_t p_size, p_usize, my_usize; int ldsc = 0; /* local disk size changed */ enum dds_flags ddsf; @@ -3004,23 +2998,15 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned drbd_set_my_capacity(mdev, p_size); } + mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size); + drbd_reconsider_max_bio_size(mdev); + if (get_ldev(mdev)) { if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) { mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); ldsc = 1; } - if (mdev->agreed_pro_version < 94) - max_bio_size = be32_to_cpu(p->max_bio_size); - else if (mdev->agreed_pro_version == 94) - max_bio_size = DRBD_MAX_SIZE_H80_PACKET; - else /* drbd 8.3.8 onwards */ - max_bio_size = DRBD_MAX_BIO_SIZE; - - if (max_bio_size != queue_max_hw_sectors(mdev->rq_queue) << 9) - drbd_setup_queue_param(mdev, max_bio_size); - - drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type)); put_ldev(mdev); } -- cgit v1.2.3 From 9a0d9d0389ef769e4b01abf50fcc11407706270b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 2 May 2011 11:51:31 +0200 Subject: drbd: fix schedule in atomic An administrative detach used to request a state change directly to D_DISKLESS, first suspending IO to avoid the last put_ldev() occuring from an endio handler, potentially in irq context. This is not enough on the receiving side (typically secondary), we may miss some peer_req on the way to local disk, which then may do the last put_ldev() from their drbd_peer_request_endio(). This patch makes the detach always go through the intermediate D_FAILED state. We may consider to rename it D_DETACHING. Alternative approach would be to create yet an other work item to be scheduled on the worker, do the destructor work from there, and get the timing right. manually picked commit 564040f from the drbd 8.4 branch. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++++ drivers/block/drbd/drbd_nl.c | 14 +++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8aa10391115b..a74d3ee04ba8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2157,6 +2157,10 @@ static inline int get_net_conf(struct drbd_conf *mdev) static inline void put_ldev(struct drbd_conf *mdev) { int i = atomic_dec_return(&mdev->local_cnt); + + /* This may be called from some endio handler, + * so we must not sleep here. */ + __release(local); D_ASSERT(i >= 0); if (i == 0) { diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 7c64ec042124..13569635b922 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1334,11 +1334,19 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { + enum drbd_ret_code retcode; + int ret; drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ - reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS)); - if (mdev->state.disk == D_DISKLESS) - wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); + retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); + /* D_FAILED will transition to DISKLESS. */ + ret = wait_event_interruptible(mdev->misc_wait, + mdev->state.disk != D_FAILED); drbd_resume_io(mdev); + if (retcode == SS_IS_DISKLESS) + retcode = SS_NOTHING_TO_DO; + if (ret) + retcode = ERR_INTR; + reply->ret_code = retcode; return 0; } -- cgit v1.2.3 From 24c4830c8ec3cbc904d84c213126a35f41a4e455 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 21 May 2011 18:32:29 +0200 Subject: drbd: Fix spelling Found these with the help of ispell -l. Signed-off-by: Bart Van Assche Signed-off-by: Lars Ellenberg Signed-off-by: Philipp Reisner --- drivers/block/drbd/drbd_actlog.c | 2 +- drivers/block/drbd/drbd_bitmap.c | 6 +++--- drivers/block/drbd/drbd_int.h | 6 +++--- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_nl.c | 6 +++--- drivers/block/drbd/drbd_receiver.c | 30 +++++++++++++++--------------- drivers/block/drbd/drbd_req.c | 18 +++++++++--------- drivers/block/drbd/drbd_req.h | 4 ++-- drivers/block/drbd/drbd_worker.c | 4 ++-- 9 files changed, 40 insertions(+), 40 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index c6828b68d77b..09ef9a878ef0 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -28,7 +28,7 @@ #include "drbd_int.h" #include "drbd_wrappers.h" -/* We maintain a trivial check sum in our on disk activity log. +/* We maintain a trivial checksum in our on disk activity log. * With that we can ensure correct operation even when the storage * device might do a partial (last) sector write while losing power. */ diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 76210ba401ac..f440a02dfdb1 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -74,7 +74,7 @@ * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage * seems excessive. * - * We plan to reduce the amount of in-core bitmap pages by pageing them in + * We plan to reduce the amount of in-core bitmap pages by paging them in * and out against their on-disk location as necessary, but need to make * sure we don't cause too much meta data IO, and must not deadlock in * tight memory situations. This needs some more work. @@ -200,7 +200,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev) * we if bits have been cleared since last IO. */ #define BM_PAGE_LAZY_WRITEOUT 28 -/* store_page_idx uses non-atomic assingment. It is only used directly after +/* store_page_idx uses non-atomic assignment. It is only used directly after * allocating the page. All other bm_set_page_* and bm_clear_page_* need to * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap * changes) may happen from various contexts, and wait_on_bit/wake_up_bit @@ -318,7 +318,7 @@ static void bm_unmap(unsigned long *p_addr) /* word offset from start of bitmap to word number _in_page_ * modulo longs per page #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long)) - hm, well, Philipp thinks gcc might not optimze the % into & (... - 1) + hm, well, Philipp thinks gcc might not optimize the % into & (... - 1) so do it explicitly: */ #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1)) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a74d3ee04ba8..7952eb90d17f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -699,7 +699,7 @@ struct drbd_request { * see drbd_endio_pri(). */ struct bio *private_bio; - struct hlist_node colision; + struct hlist_node collision; sector_t sector; unsigned int size; unsigned int epoch; /* barrier_nr */ @@ -765,7 +765,7 @@ struct digest_info { struct drbd_epoch_entry { struct drbd_work w; - struct hlist_node colision; + struct hlist_node collision; struct drbd_epoch *epoch; /* for writes */ struct drbd_conf *mdev; struct page *pages; @@ -1520,7 +1520,7 @@ extern void drbd_resume_io(struct drbd_conf *mdev); extern char *ppsize(char *buf, unsigned long long size); extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int); enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; -extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); +extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); extern void resync_after_online_grow(struct drbd_conf *); extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev); extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index cfeb13b5a216..0358e55356c8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2732,7 +2732,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) /* double check digest, sometimes buffers have been modified in flight. */ if (dgs > 0 && dgs <= 64) { - /* 64 byte, 512 bit, is the larges digest size + /* 64 byte, 512 bit, is the largest digest size * currently supported in kernel crypto. */ unsigned char digest[64]; drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest); @@ -3287,7 +3287,7 @@ static void drbd_delete_device(unsigned int minor) drbd_release_ee_lists(mdev); - /* should be free'd on disconnect? */ + /* should be freed on disconnect? */ kfree(mdev->ee_hash); /* mdev->ee_hash_s = 0; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 13569635b922..259ca0b20961 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -596,7 +596,7 @@ void drbd_resume_io(struct drbd_conf *mdev) * Returns 0 on success, negative return values indicate errors. * You should call drbd_md_sync() after calling this function. */ -enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) +enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) { sector_t prev_first_sect, prev_size; /* previous meta location */ sector_t la_size; @@ -1205,7 +1205,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) set_bit(USE_DEGR_WFC_T, &mdev->flags); - dd = drbd_determin_dev_size(mdev, 0); + dd = drbd_determine_dev_size(mdev, 0); if (dd == dev_size_error) { retcode = ERR_NOMEM_BITMAP; goto force_diskless_dec; @@ -1719,7 +1719,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); - dd = drbd_determin_dev_size(mdev, ddsf); + dd = drbd_determine_dev_size(mdev, ddsf); drbd_md_sync(mdev); put_ldev(mdev); if (dd == dev_size_error) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6ea0a4b51ece..d9f2109fd9e6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -333,7 +333,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, if (!page) goto fail; - INIT_HLIST_NODE(&e->colision); + INIT_HLIST_NODE(&e->collision); e->epoch = NULL; e->mdev = mdev; e->pages = page; @@ -356,7 +356,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i kfree(e->digest); drbd_pp_free(mdev, e->pages, is_net); D_ASSERT(atomic_read(&e->pending_bios) == 0); - D_ASSERT(hlist_unhashed(&e->colision)); + D_ASSERT(hlist_unhashed(&e->collision)); mempool_free(e, drbd_ee_mempool); } @@ -1413,7 +1413,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u sector_t sector = e->sector; int ok; - D_ASSERT(hlist_unhashed(&e->colision)); + D_ASSERT(hlist_unhashed(&e->collision)); if (likely((e->flags & EE_WAS_ERROR) == 0)) { drbd_set_in_sync(mdev, sector, e->size); @@ -1482,7 +1482,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi return false; } - /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid + /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid * special casing it there for the various failure cases. * still no race with drbd_fail_pending_reads */ ok = recv_dless_read(mdev, req, sector, data_size); @@ -1553,11 +1553,11 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ if (mdev->net_conf->two_primaries) { spin_lock_irq(&mdev->req_lock); - D_ASSERT(!hlist_unhashed(&e->colision)); - hlist_del_init(&e->colision); + D_ASSERT(!hlist_unhashed(&e->collision)); + hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); } else { - D_ASSERT(hlist_unhashed(&e->colision)); + D_ASSERT(hlist_unhashed(&e->collision)); } drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); @@ -1574,8 +1574,8 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); spin_lock_irq(&mdev->req_lock); - D_ASSERT(!hlist_unhashed(&e->colision)); - hlist_del_init(&e->colision); + D_ASSERT(!hlist_unhashed(&e->collision)); + hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); dec_unacked(mdev); @@ -1750,7 +1750,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_lock_irq(&mdev->req_lock); - hlist_add_head(&e->colision, ee_hash_slot(mdev, sector)); + hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); #define OVERLAPS overlaps(i->sector, i->size, sector, size) slot = tl_hash_slot(mdev, sector); @@ -1760,7 +1760,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int have_conflict = 0; prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); - hlist_for_each_entry(i, n, slot, colision) { + hlist_for_each_entry(i, n, slot, collision) { if (OVERLAPS) { /* only ALERT on first iteration, * we may be woken up early... */ @@ -1799,7 +1799,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } if (signal_pending(current)) { - hlist_del_init(&e->colision); + hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); @@ -1857,7 +1857,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned dev_err(DEV, "submit failed, triggering re-connect\n"); spin_lock_irq(&mdev->req_lock); list_del(&e->w.list); - hlist_del_init(&e->colision); + hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); if (e->flags & EE_CALL_AL_COMPLETE_IO) drbd_al_complete_io(mdev, e->sector); @@ -2988,7 +2988,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned ddsf = be16_to_cpu(p->dds_flags); if (get_ldev(mdev)) { - dd = drbd_determin_dev_size(mdev, ddsf); + dd = drbd_determine_dev_size(mdev, ddsf); put_ldev(mdev); if (dd == dev_size_error) return false; @@ -4261,7 +4261,7 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, struct hlist_node *n; struct drbd_request *req; - hlist_for_each_entry(req, n, slot, colision) { + hlist_for_each_entry(req, n, slot, collision) { if ((unsigned long)req == (unsigned long)id) { if (req->sector != sector) { dev_err(DEV, "_ack_id_to_req: found req %p but it has " diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 14645bd40092..3424d675b769 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -163,7 +163,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * they must have been failed on the spot */ #define OVERLAPS overlaps(sector, size, i->sector, i->size) slot = tl_hash_slot(mdev, sector); - hlist_for_each_entry(i, n, slot, colision) { + hlist_for_each_entry(i, n, slot, collision) { if (OVERLAPS) { dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " "other: %p %llus +%u\n", @@ -187,7 +187,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, #undef OVERLAPS #define OVERLAPS overlaps(sector, size, e->sector, e->size) slot = ee_hash_slot(mdev, req->sector); - hlist_for_each_entry(e, n, slot, colision) { + hlist_for_each_entry(e, n, slot, collision) { if (OVERLAPS) { wake_up(&mdev->misc_wait); break; @@ -260,8 +260,8 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) /* remove the request from the conflict detection * respective block_id verification hash */ - if (!hlist_unhashed(&req->colision)) - hlist_del(&req->colision); + if (!hlist_unhashed(&req->collision)) + hlist_del(&req->collision); else D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); @@ -329,7 +329,7 @@ static int _req_conflicts(struct drbd_request *req) struct hlist_node *n; struct hlist_head *slot; - D_ASSERT(hlist_unhashed(&req->colision)); + D_ASSERT(hlist_unhashed(&req->collision)); if (!get_net_conf(mdev)) return 0; @@ -341,7 +341,7 @@ static int _req_conflicts(struct drbd_request *req) #define OVERLAPS overlaps(i->sector, i->size, sector, size) slot = tl_hash_slot(mdev, sector); - hlist_for_each_entry(i, n, slot, colision) { + hlist_for_each_entry(i, n, slot, collision) { if (OVERLAPS) { dev_alert(DEV, "%s[%u] Concurrent local write detected! " "[DISCARD L] new: %llus +%u; " @@ -359,7 +359,7 @@ static int _req_conflicts(struct drbd_request *req) #undef OVERLAPS #define OVERLAPS overlaps(e->sector, e->size, sector, size) slot = ee_hash_slot(mdev, sector); - hlist_for_each_entry(e, n, slot, colision) { + hlist_for_each_entry(e, n, slot, collision) { if (OVERLAPS) { dev_alert(DEV, "%s[%u] Concurrent remote write detected!" " [DISCARD L] new: %llus +%u; " @@ -491,7 +491,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* so we can verify the handle in the answer packet * corresponding hlist_del is in _req_may_be_done() */ - hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector)); + hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector)); set_bit(UNPLUG_REMOTE, &mdev->flags); @@ -507,7 +507,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* assert something? */ /* from drbd_make_request_common only */ - hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector)); + hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector)); /* corresponding hlist_del is in _req_may_be_done() */ /* NOTE diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 32e2c3e6a813..281342dca2c8 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -256,7 +256,7 @@ static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev, struct hlist_node *n; struct drbd_request *req; - hlist_for_each_entry(req, n, slot, colision) { + hlist_for_each_entry(req, n, slot, collision) { if ((unsigned long)req == (unsigned long)id) { D_ASSERT(req->sector == sector); return req; @@ -291,7 +291,7 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, req->epoch = 0; req->sector = bio_src->bi_sector; req->size = bio_src->bi_size; - INIT_HLIST_NODE(&req->colision); + INIT_HLIST_NODE(&req->collision); INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index b5e53695fd7e..4d76b06b6b20 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -126,7 +126,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo list_del(&e->w.list); /* has been on active_ee or sync_ee */ list_add_tail(&e->w.list, &mdev->done_ee); - /* No hlist_del_init(&e->colision) here, we did not send the Ack yet, + /* No hlist_del_init(&e->collision) here, we did not send the Ack yet, * neither did we wake possibly waiting conflicting requests. * done from "drbd_process_done_ee" within the appropriate w.cb * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */ @@ -840,7 +840,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) const int ratio = (t == 0) ? 0 : (t < 100000) ? ((s*100)/t) : (s/(t/100)); - dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; " + dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; " "transferred %luK total %luK\n", ratio, Bit2KB(mdev->rs_same_csum), -- cgit v1.2.3 From 9b2f61aec73dc9e735e247fd720c673b30999e7c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 24 May 2011 10:27:38 +0200 Subject: drbd: fix warning Signed-off-by: Philipp Reisner --- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 259ca0b20961..515bcd948a43 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1342,7 +1342,7 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, ret = wait_event_interruptible(mdev->misc_wait, mdev->state.disk != D_FAILED); drbd_resume_io(mdev); - if (retcode == SS_IS_DISKLESS) + if ((int)retcode == (int)SS_IS_DISKLESS) retcode = SS_NOTHING_TO_DO; if (ret) retcode = ERR_INTR; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d9f2109fd9e6..25d32c5aa50a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2911,12 +2911,6 @@ disconnect: return false; } -static void drbd_setup_order_type(struct drbd_conf *mdev, int peer) -{ - /* sorry, we currently have no working implementation - * of distributed TCQ */ -} - /* warn if the arguments differ by more than 12.5% */ static void warn_if_differ_considerably(struct drbd_conf *mdev, const char *s, sector_t a, sector_t b) -- cgit v1.2.3 From 0ddf72be4edbd7640b57c13161f71416df16ec11 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 23 May 2011 15:29:32 -0700 Subject: drbd: fix warning In file included from drivers/block/drbd/drbd_main.c:54: drivers/block/drbd/drbd_int.h:1190: warning: parameter has incomplete type Forward declarations of enums do not work. Fix it unpleasantly by moving the prototype. Cc: Jens Axboe Signed-off-by: Lars Ellenberg Signed-off-by: Philipp Reisner Signed-off-by: Andrew Morton --- drivers/block/drbd/drbd_int.h | 2 -- drivers/block/drbd/drbd_req.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7952eb90d17f..b127f8d25b0b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1219,8 +1219,6 @@ extern void drbd_free_resources(struct drbd_conf *mdev); extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, unsigned int set_size); extern void tl_clear(struct drbd_conf *mdev); -enum drbd_req_event; -extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); extern void drbd_free_sock(struct drbd_conf *mdev); extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 281342dca2c8..68a234a5fdc5 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -323,6 +323,7 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, extern void complete_master_bio(struct drbd_conf *mdev, struct bio_and_error *m); extern void request_timer_fn(unsigned long data); +extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); /* use this if you don't want to deal with calling complete_master_bio() * outside the spinlock, e.g. when walking some list on cleanup. */ -- cgit v1.2.3 From 78f4bb367fd147a0e7e3998ba6e47109999d8814 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 24 May 2011 16:48:54 +0200 Subject: loop: limit 'max_part' module param to DISK_MAX_PARTS The 'max_part' parameter controls the number of maximum partition a loop block device can have. However if a user specifies very large value it would exceed the limitation of device minor number and can cause a kernel panic (or, at least, produce invalid device nodes in some cases). On my desktop system, following command kills the kernel. On qemu, it triggers similar oops but the kernel was alive: $ sudo modprobe loop max_part0000 ------------[ cut here ]------------ kernel BUG at /media/Linux_Data/project/linux/fs/sysfs/group.c:65! invalid opcode: 0000 [#1] SMP last sysfs file: CPU 0 Modules linked in: loop(+) Pid: 43, comm: insmod Tainted: G W 2.6.39-qemu+ #155 Bochs Bochs RIP: 0010:[] [] internal_create_group= +0x2a/0x170 RSP: 0018:ffff880007b3fde8 EFLAGS: 00000246 RAX: 00000000ffffffef RBX: ffff880007b3d878 RCX: 00000000000007b4 RDX: ffffffff8152da50 RSI: 0000000000000000 RDI: ffff880007b3d878 RBP: ffff880007b3fe38 R08: ffff880007b3fde8 R09: 0000000000000000 R10: ffff88000783b4a8 R11: ffff880007b3d878 R12: ffffffff8152da50 R13: ffff880007b3d868 R14: 0000000000000000 R15: ffff880007b3d800 FS: 0000000002137880(0063) GS:ffff880007c00000(0000) knlGS:00000000000000= 00 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000422680 CR3: 0000000007b50000 CR4: 00000000000006b0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 0000000000000000 DR7: 0000000000000000 Process insmod (pid: 43, threadinfo ffff880007b3e000, task ffff880007afb9c= 0) Stack: ffff880007b3fe58 ffffffff811e66dd ffff880007b3fe58 ffffffff811e570b 0000000000000010 ffff880007b3d800 ffff880007a7b390 ffff880007b3d868 0000000000400920 ffff880007b3d800 ffff880007b3fe48 ffffffff8113cfc8 Call Trace: [] ? device_add+0x4bc/0x5af [] ? dev_set_name+0x3c/0x3e [] sysfs_create_group+0xe/0x12 [] blk_trace_init_sysfs+0x14/0x16 [] blk_register_queue+0x47/0xf7 [] add_disk+0xdf/0x290 [] loop_init+0xeb/0x1b8 [loop] [] ? 0xffffffffa0005fff [] do_one_initcall+0x7a/0x12e [] sys_init_module+0x9c/0x1e0 [] system_call_fastpath+0x16/0x1b Code: c3 55 48 89 e5 41 57 41 56 41 89 f6 41 55 41 54 49 89 d4 53 48 89 fb= 48 83 ec 28 48 85 ff 74 0b 85 f6 75 0b 48 83 7f 30 00 75 14 <0f> 0b eb fe = 48 83 7f 30 00 b9 ea ff ff ff 0f 84 18 01 00 00 49 RIP [] internal_create_group+0x2a/0x170 RSP ---[ end trace a123eb592043acad ]--- Signed-off-by: Namhyung Kim Cc: Laurent Vivier Cc: stable@kernel.org Signed-off-by: Jens Axboe --- drivers/block/loop.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a076a14ca72d..cbf7052d1dd5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1691,6 +1691,9 @@ static int __init loop_init(void) if (max_part > 0) part_shift = fls(max_part); + if ((1UL << part_shift) > DISK_MAX_PARTS) + return -EINVAL; + if (max_loop > 1UL << (MINORBITS - part_shift)) return -EINVAL; -- cgit v1.2.3 From a1c15c59feee36267c43142a41152fbf7402afb6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 24 May 2011 16:48:55 +0200 Subject: loop: handle on-demand devices correctly When finding or allocating a loop device, loop_probe() did not take partition numbers into account so that it can result to a different device. Consider following example: $ sudo modprobe loop max_part=15 $ ls -l /dev/loop* brw-rw---- 1 root disk 7, 0 2011-05-24 22:16 /dev/loop0 brw-rw---- 1 root disk 7, 16 2011-05-24 22:16 /dev/loop1 brw-rw---- 1 root disk 7, 32 2011-05-24 22:16 /dev/loop2 brw-rw---- 1 root disk 7, 48 2011-05-24 22:16 /dev/loop3 brw-rw---- 1 root disk 7, 64 2011-05-24 22:16 /dev/loop4 brw-rw---- 1 root disk 7, 80 2011-05-24 22:16 /dev/loop5 brw-rw---- 1 root disk 7, 96 2011-05-24 22:16 /dev/loop6 brw-rw---- 1 root disk 7, 112 2011-05-24 22:16 /dev/loop7 $ sudo mknod /dev/loop8 b 7 128 $ sudo losetup /dev/loop8 ~/temp/disk-with-3-parts.img $ sudo losetup -a /dev/loop128: [0805]:278201 (/home/namhyung/temp/disk-with-3-parts.img) $ ls -l /dev/loop* brw-rw---- 1 root disk 7, 0 2011-05-24 22:16 /dev/loop0 brw-rw---- 1 root disk 7, 16 2011-05-24 22:16 /dev/loop1 brw-rw---- 1 root disk 7, 2048 2011-05-24 22:18 /dev/loop128 brw-rw---- 1 root disk 7, 2049 2011-05-24 22:18 /dev/loop128p1 brw-rw---- 1 root disk 7, 2050 2011-05-24 22:18 /dev/loop128p2 brw-rw---- 1 root disk 7, 2051 2011-05-24 22:18 /dev/loop128p3 brw-rw---- 1 root disk 7, 32 2011-05-24 22:16 /dev/loop2 brw-rw---- 1 root disk 7, 48 2011-05-24 22:16 /dev/loop3 brw-rw---- 1 root disk 7, 64 2011-05-24 22:16 /dev/loop4 brw-rw---- 1 root disk 7, 80 2011-05-24 22:16 /dev/loop5 brw-rw---- 1 root disk 7, 96 2011-05-24 22:16 /dev/loop6 brw-rw---- 1 root disk 7, 112 2011-05-24 22:16 /dev/loop7 brw-r--r-- 1 root root 7, 128 2011-05-24 22:17 /dev/loop8 After this patch, /dev/loop8 - instead of /dev/loop128 - was accessed correctly. In addition, 'range' passed to blk_register_region() should include all range of dev_t that LOOP_MAJOR can address. It does not need to be limited by partition numbers unless 'max_loop' param was specified. Signed-off-by: Namhyung Kim Cc: Laurent Vivier Cc: stable@kernel.org Signed-off-by: Jens Axboe --- drivers/block/loop.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cbf7052d1dd5..c59a672a3de0 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1658,7 +1658,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) struct kobject *kobj; mutex_lock(&loop_devices_mutex); - lo = loop_init_one(dev & MINORMASK); + lo = loop_init_one(MINOR(dev) >> part_shift); kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); mutex_unlock(&loop_devices_mutex); @@ -1699,10 +1699,10 @@ static int __init loop_init(void) if (max_loop) { nr = max_loop; - range = max_loop; + range = max_loop << part_shift; } else { nr = 8; - range = 1UL << (MINORBITS - part_shift); + range = 1UL << MINORBITS; } if (register_blkdev(LOOP_MAJOR, "loop")) @@ -1741,7 +1741,7 @@ static void __exit loop_exit(void) unsigned long range; struct loop_device *lo, *next; - range = max_loop ? max_loop : 1UL << (MINORBITS - part_shift); + range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; list_for_each_entry_safe(lo, next, &loop_devices, lo_list) loop_del_one(lo); -- cgit v1.2.3