diff options
Diffstat (limited to 'drivers/nvdimm/blk.c')
-rw-r--r-- | drivers/nvdimm/blk.c | 384 |
1 files changed, 384 insertions, 0 deletions
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c new file mode 100644 index 000000000000..4f97b248c236 --- /dev/null +++ b/drivers/nvdimm/blk.c @@ -0,0 +1,384 @@ +/* + * NVDIMM Block Window Driver + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/blkdev.h> +#include <linux/fs.h> +#include <linux/genhd.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/nd.h> +#include <linux/sizes.h> +#include "nd.h" + +struct nd_blk_device { + struct request_queue *queue; + struct gendisk *disk; + struct nd_namespace_blk *nsblk; + struct nd_blk_region *ndbr; + size_t disk_size; + u32 sector_size; + u32 internal_lbasize; +}; + +static int nd_blk_major; + +static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) +{ + return blk_dev->nsblk->lbasize - blk_dev->sector_size; +} + +static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk, + resource_size_t ns_offset, unsigned int len) +{ + int i; + + for (i = 0; i < nsblk->num_resources; i++) { + if (ns_offset < resource_size(nsblk->res[i])) { + if (ns_offset + len > resource_size(nsblk->res[i])) { + dev_WARN_ONCE(&nsblk->common.dev, 1, + "illegal request\n"); + return SIZE_MAX; + } + return nsblk->res[i]->start + ns_offset; + } + ns_offset -= resource_size(nsblk->res[i]); + } + + dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n"); + return SIZE_MAX; +} + +#ifdef CONFIG_BLK_DEV_INTEGRITY +static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, + struct bio_integrity_payload *bip, u64 lba, + int rw) +{ + unsigned int len = nd_blk_meta_size(blk_dev); + resource_size_t dev_offset, ns_offset; + struct nd_namespace_blk *nsblk; + struct nd_blk_region *ndbr; + int err = 0; + + nsblk = blk_dev->nsblk; + ndbr = blk_dev->ndbr; + ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size; + dev_offset = to_dev_offset(nsblk, ns_offset, len); + if (dev_offset == SIZE_MAX) + return -EIO; + + while (len) { + unsigned int cur_len; + struct bio_vec bv; + void *iobuf; + + bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); + /* + * The 'bv' obtained from bvec_iter_bvec has its .bv_len and + * .bv_offset already adjusted for iter->bi_bvec_done, and we + * can use those directly + */ + + cur_len = min(len, bv.bv_len); + iobuf = kmap_atomic(bv.bv_page); + err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset, + cur_len, rw); + kunmap_atomic(iobuf); + if (err) + return err; + + len -= cur_len; + dev_offset += cur_len; + bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len); + } + + return err; +} + +#else /* CONFIG_BLK_DEV_INTEGRITY */ +static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, + struct bio_integrity_payload *bip, u64 lba, + int rw) +{ + return 0; +} +#endif + +static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, + struct bio_integrity_payload *bip, struct page *page, + unsigned int len, unsigned int off, int rw, + sector_t sector) +{ + struct nd_blk_region *ndbr = blk_dev->ndbr; + resource_size_t dev_offset, ns_offset; + int err = 0; + void *iobuf; + u64 lba; + + while (len) { + unsigned int cur_len; + + /* + * If we don't have an integrity payload, we don't have to + * split the bvec into sectors, as this would cause unnecessary + * Block Window setup/move steps. the do_io routine is capable + * of handling len <= PAGE_SIZE. + */ + cur_len = bip ? min(len, blk_dev->sector_size) : len; + + lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size); + ns_offset = lba * blk_dev->internal_lbasize; + dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len); + if (dev_offset == SIZE_MAX) + return -EIO; + + iobuf = kmap_atomic(page); + err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw); + kunmap_atomic(iobuf); + if (err) + return err; + + if (bip) { + err = nd_blk_rw_integrity(blk_dev, bip, lba, rw); + if (err) + return err; + } + len -= cur_len; + off += cur_len; + sector += blk_dev->sector_size >> SECTOR_SHIFT; + } + + return err; +} + +static void nd_blk_make_request(struct request_queue *q, struct bio *bio) +{ + struct block_device *bdev = bio->bi_bdev; + struct gendisk *disk = bdev->bd_disk; + struct bio_integrity_payload *bip; + struct nd_blk_device *blk_dev; + struct bvec_iter iter; + unsigned long start; + struct bio_vec bvec; + int err = 0, rw; + bool do_acct; + + /* + * bio_integrity_enabled also checks if the bio already has an + * integrity payload attached. If it does, we *don't* do a + * bio_integrity_prep here - the payload has been generated by + * another kernel subsystem, and we just pass it through. + */ + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { + err = -EIO; + goto out; + } + + bip = bio_integrity(bio); + blk_dev = disk->private_data; + rw = bio_data_dir(bio); + do_acct = nd_iostat_start(bio, &start); + bio_for_each_segment(bvec, bio, iter) { + unsigned int len = bvec.bv_len; + + BUG_ON(len > PAGE_SIZE); + err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len, + bvec.bv_offset, rw, iter.bi_sector); + if (err) { + dev_info(&blk_dev->nsblk->common.dev, + "io error in %s sector %lld, len %d,\n", + (rw == READ) ? "READ" : "WRITE", + (unsigned long long) iter.bi_sector, len); + break; + } + } + if (do_acct) + nd_iostat_end(bio, start); + + out: + bio_endio(bio, err); +} + +static int nd_blk_rw_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *iobuf, size_t n, int rw) +{ + struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim); + struct nd_namespace_blk *nsblk = blk_dev->nsblk; + struct nd_blk_region *ndbr = blk_dev->ndbr; + resource_size_t dev_offset; + + dev_offset = to_dev_offset(nsblk, offset, n); + + if (unlikely(offset + n > blk_dev->disk_size)) { + dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); + return -EFAULT; + } + + if (dev_offset == SIZE_MAX) + return -EIO; + + return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw); +} + +static const struct block_device_operations nd_blk_fops = { + .owner = THIS_MODULE, + .revalidate_disk = nvdimm_revalidate_disk, +}; + +static int nd_blk_attach_disk(struct nd_namespace_common *ndns, + struct nd_blk_device *blk_dev) +{ + resource_size_t available_disk_size; + struct gendisk *disk; + u64 internal_nlba; + + internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize); + available_disk_size = internal_nlba * blk_dev->sector_size; + + blk_dev->queue = blk_alloc_queue(GFP_KERNEL); + if (!blk_dev->queue) + return -ENOMEM; + + blk_queue_make_request(blk_dev->queue, nd_blk_make_request); + blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX); + blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY); + blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size); + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue); + + disk = blk_dev->disk = alloc_disk(0); + if (!disk) { + blk_cleanup_queue(blk_dev->queue); + return -ENOMEM; + } + + disk->driverfs_dev = &ndns->dev; + disk->major = nd_blk_major; + disk->first_minor = 0; + disk->fops = &nd_blk_fops; + disk->private_data = blk_dev; + disk->queue = blk_dev->queue; + disk->flags = GENHD_FL_EXT_DEVT; + nvdimm_namespace_disk_name(ndns, disk->disk_name); + set_capacity(disk, 0); + add_disk(disk); + + if (nd_blk_meta_size(blk_dev)) { + int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev)); + + if (rc) { + del_gendisk(disk); + put_disk(disk); + blk_cleanup_queue(blk_dev->queue); + return rc; + } + } + + set_capacity(disk, available_disk_size >> SECTOR_SHIFT); + revalidate_disk(disk); + return 0; +} + +static int nd_blk_probe(struct device *dev) +{ + struct nd_namespace_common *ndns; + struct nd_namespace_blk *nsblk; + struct nd_blk_device *blk_dev; + int rc; + + ndns = nvdimm_namespace_common_probe(dev); + if (IS_ERR(ndns)) + return PTR_ERR(ndns); + + blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL); + if (!blk_dev) + return -ENOMEM; + + nsblk = to_nd_namespace_blk(&ndns->dev); + blk_dev->disk_size = nvdimm_namespace_capacity(ndns); + blk_dev->ndbr = to_nd_blk_region(dev->parent); + blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev); + blk_dev->internal_lbasize = roundup(nsblk->lbasize, + INT_LBASIZE_ALIGNMENT); + blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512); + dev_set_drvdata(dev, blk_dev); + + ndns->rw_bytes = nd_blk_rw_bytes; + if (is_nd_btt(dev)) + rc = nvdimm_namespace_attach_btt(ndns); + else if (nd_btt_probe(ndns, blk_dev) == 0) { + /* we'll come back as btt-blk */ + rc = -ENXIO; + } else + rc = nd_blk_attach_disk(ndns, blk_dev); + if (rc) + kfree(blk_dev); + return rc; +} + +static void nd_blk_detach_disk(struct nd_blk_device *blk_dev) +{ + del_gendisk(blk_dev->disk); + put_disk(blk_dev->disk); + blk_cleanup_queue(blk_dev->queue); +} + +static int nd_blk_remove(struct device *dev) +{ + struct nd_blk_device *blk_dev = dev_get_drvdata(dev); + + if (is_nd_btt(dev)) + nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); + else + nd_blk_detach_disk(blk_dev); + kfree(blk_dev); + + return 0; +} + +static struct nd_device_driver nd_blk_driver = { + .probe = nd_blk_probe, + .remove = nd_blk_remove, + .drv = { + .name = "nd_blk", + }, + .type = ND_DRIVER_NAMESPACE_BLK, +}; + +static int __init nd_blk_init(void) +{ + int rc; + + rc = register_blkdev(0, "nd_blk"); + if (rc < 0) + return rc; + + nd_blk_major = rc; + rc = nd_driver_register(&nd_blk_driver); + + if (rc < 0) + unregister_blkdev(nd_blk_major, "nd_blk"); + + return rc; +} + +static void __exit nd_blk_exit(void) +{ + driver_unregister(&nd_blk_driver.drv); + unregister_blkdev(nd_blk_major, "nd_blk"); +} + +MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK); +module_init(nd_blk_init); +module_exit(nd_blk_exit); |