From 3bcebc5bac0935d662f30d317e33ffa660bebf93 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 11 Dec 2018 20:16:13 +0100 Subject: lightnvm: pblk: set conservative threshold for user writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a worst-case scenario (random writes), OP% of sectors in each line will be invalid, and we will then need to move data out of 100/OP% lines to free a single line. So, to prevent the possibility of running out of lines, temporarily block user writes when there is less than 100/OP% free lines. Also ensure that pblk creation does not produce instances with insufficient over provisioning. Insufficient over-provising is not a problem on real hardware, but often an issue when running QEMU simulations (with few lines). 100 lines is enough to create a sane instance with the standard (11%) over provisioning. Signed-off-by: Hans Holmberg Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 13822594647c..f083130d9920 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -635,7 +635,7 @@ static unsigned int calc_emeta_len(struct pblk *pblk) return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]); } -static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) +static int pblk_set_provision(struct pblk *pblk, int nr_free_chks) { struct nvm_tgt_dev *dev = pblk->dev; struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -643,23 +643,41 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) struct nvm_geo *geo = &dev->geo; sector_t provisioned; int sec_meta, blk_meta; + int minimum; if (geo->op == NVM_TARGET_DEFAULT_OP) pblk->op = PBLK_DEFAULT_OP; else pblk->op = geo->op; - provisioned = nr_free_blks; + minimum = pblk_get_min_chks(pblk); + provisioned = nr_free_chks; provisioned *= (100 - pblk->op); sector_div(provisioned, 100); - pblk->op_blks = nr_free_blks - provisioned; + if ((nr_free_chks - provisioned) < minimum) { + if (geo->op != NVM_TARGET_DEFAULT_OP) { + pblk_err(pblk, "OP too small to create a sane instance\n"); + return -EINTR; + } + + /* If the user did not specify an OP value, and PBLK_DEFAULT_OP + * is not enough, calculate and set sane value + */ + + provisioned = nr_free_chks - minimum; + pblk->op = (100 * minimum) / nr_free_chks; + pblk_info(pblk, "Default OP insufficient, adjusting OP to %d\n", + pblk->op); + } + + pblk->op_blks = nr_free_chks - provisioned; /* Internally pblk manages all free blocks, but all calculations based * on user capacity consider only provisioned blocks */ - pblk->rl.total_blocks = nr_free_blks; - pblk->rl.nr_secs = nr_free_blks * geo->clba; + pblk->rl.total_blocks = nr_free_chks; + pblk->rl.nr_secs = nr_free_chks * geo->clba; /* Consider sectors used for metadata */ sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; @@ -667,8 +685,10 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) pblk->capacity = (provisioned - blk_meta) * geo->clba; - atomic_set(&pblk->rl.free_blocks, nr_free_blks); - atomic_set(&pblk->rl.free_user_blocks, nr_free_blks); + atomic_set(&pblk->rl.free_blocks, nr_free_chks); + atomic_set(&pblk->rl.free_user_blocks, nr_free_chks); + + return 0; } static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line, @@ -984,7 +1004,7 @@ static int pblk_lines_init(struct pblk *pblk) struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line *line; void *chunk_meta; - long nr_free_chks = 0; + int nr_free_chks = 0; int i, ret; ret = pblk_line_meta_init(pblk); @@ -1031,7 +1051,9 @@ static int pblk_lines_init(struct pblk *pblk) goto fail_free_lines; } - pblk_set_provision(pblk, nr_free_chks); + ret = pblk_set_provision(pblk, nr_free_chks); + if (ret) + goto fail_free_lines; vfree(chunk_meta); return 0; -- cgit v1.2.3 From c9a1d640d519b40d00dac850d1f17a7df1954689 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 11 Dec 2018 20:16:14 +0100 Subject: lightnvm: pblk: remove unused macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ADDR_POOL_SIZE is not used anymore, so remove the macro. Signed-off-by: Hans Holmberg Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index f083130d9920..3219f335fce9 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -207,9 +207,6 @@ static int pblk_rwb_init(struct pblk *pblk) return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs); } -/* Minimum pages needed within a lun */ -#define ADDR_POOL_SIZE 64 - static int pblk_set_addrf_12(struct pblk *pblk, struct nvm_geo *geo, struct nvm_addrf_12 *dst) { -- cgit v1.2.3 From 0934ce87b588a3da657b41804bf07518103875a4 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 11 Dec 2018 20:16:15 +0100 Subject: lightnvm: pblk: fix pblk_lines_init error handling path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chunk metadata is allocated with vmalloc, so we need to use vfree to free it. Fixes: 090ee26fd512 ("lightnvm: use internal allocation for chunk log page") Signed-off-by: Hans Holmberg Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 3219f335fce9..0e37104de596 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1060,7 +1060,7 @@ fail_free_lines: pblk_line_meta_free(l_mg, &pblk->lines[i]); kfree(pblk->lines); fail_free_chunk_meta: - kfree(chunk_meta); + vfree(chunk_meta); fail_free_luns: kfree(pblk->luns); fail_free_meta: -- cgit v1.2.3 From 42bd0384d77ef7552954056928018f5cfa91a013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Tue, 11 Dec 2018 20:16:21 +0100 Subject: lightnvm: pblk: avoid ref warning on cache creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current kref implementation around pblk global caches triggers a false positive on refcount_inc_checked() (when called) as the kref is initialized to 0. Instead of usint kref_inc() on a 0 reference, which is in principle correct, use kref_init() to avoid the check. This is also more explicit about what actually happens on cache creation. In the process, do a small refactoring to use kref helpers. Fixes: 1864de94ec9d6 "lightnvm: pblk: stop recreating global caches" Signed-off-by: Javier González Reviewed-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 0e37104de596..72ad3e70318c 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -347,23 +347,19 @@ fail_destroy_ws: static int pblk_get_global_caches(void) { - int ret; + int ret = 0; mutex_lock(&pblk_caches.mutex); - if (kref_read(&pblk_caches.kref) > 0) { - kref_get(&pblk_caches.kref); - mutex_unlock(&pblk_caches.mutex); - return 0; - } + if (kref_get_unless_zero(&pblk_caches.kref)) + goto out; ret = pblk_create_global_caches(); - if (!ret) - kref_get(&pblk_caches.kref); + kref_init(&pblk_caches.kref); +out: mutex_unlock(&pblk_caches.mutex); - return ret; } -- cgit v1.2.3 From faa79f27f0a46cd6c3ac3de5e7f3e142598217fc Mon Sep 17 00:00:00 2001 From: Igor Konopko Date: Tue, 11 Dec 2018 20:16:23 +0100 Subject: lightnvm: pblk: add helpers for OOB metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk currently assumes that size of OOB metadata on drive is always equal to size of pblk_sec_meta struct. This commit add helpers which will allow to handle different sizes of OOB metadata on drive in the future. After this patch only OOB metadata equal to 16 bytes is supported. Reviewed-by: Javier González Signed-off-by: Igor Konopko Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 72ad3e70318c..33361bfb85c3 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -405,6 +405,12 @@ static int pblk_core_init(struct pblk *pblk) queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT)); pblk_set_sec_per_write(pblk, pblk->min_write_pgs); + pblk->oob_meta_size = geo->sos; + if (pblk->oob_meta_size != sizeof(struct pblk_sec_meta)) { + pblk_err(pblk, "Unsupported metadata size\n"); + return -EINVAL; + } + pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t), GFP_KERNEL); if (!pblk->pad_dist) -- cgit v1.2.3 From 24828d0536bbedc9b265f2b01ffca99de3f6a7c7 Mon Sep 17 00:00:00 2001 From: Igor Konopko Date: Tue, 11 Dec 2018 20:16:24 +0100 Subject: lightnvm: dynamic DMA pool entry size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently lightnvm and pblk uses single DMA pool, for which the entry size always is equal to PAGE_SIZE. The contents of each entry allocated from the DMA pool consists of a PPA list (8bytes * 64), leaving 56bytes * 64 space for metadata. Since the metadata field can be bigger, such as 128 bytes, the static size does not cover this use-case. This patch adds support for I/O metadata above 56 bytes by changing DMA pool size based on device meta size and allows pblk to use OOB metadata >=16B. Reviewed-by: Javier González Signed-off-by: Igor Konopko Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 33361bfb85c3..ff6a6df369c3 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -406,7 +406,7 @@ static int pblk_core_init(struct pblk *pblk) pblk_set_sec_per_write(pblk, pblk->min_write_pgs); pblk->oob_meta_size = geo->sos; - if (pblk->oob_meta_size != sizeof(struct pblk_sec_meta)) { + if (pblk->oob_meta_size < sizeof(struct pblk_sec_meta)) { pblk_err(pblk, "Unsupported metadata size\n"); return -EINVAL; } -- cgit v1.2.3 From a16816b9e462e8ee86a908606bde54b53cfeca80 Mon Sep 17 00:00:00 2001 From: Igor Konopko Date: Tue, 11 Dec 2018 20:16:25 +0100 Subject: lightnvm: disable interleaved metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently pblk only check the size of I/O metadata and does not take into account if this metadata is in a separate buffer or interleaved in a single metadata buffer. In reality only the first scenario is supported, where second mode will break pblk functionality during any IO operation. This patch prevents pblk to be instantiated in case device only supports interleaved metadata. Reviewed-by: Javier González Signed-off-by: Igor Konopko Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index ff6a6df369c3..e8055b796381 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1175,6 +1175,12 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, return ERR_PTR(-EINVAL); } + if (geo->ext) { + pblk_err(pblk, "extended metadata not supported\n"); + kfree(pblk); + return ERR_PTR(-EINVAL); + } + spin_lock_init(&pblk->resubmit_lock); spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); -- cgit v1.2.3 From 55d8ec35398e7ab001989473cf6ed6f40b5ef4a6 Mon Sep 17 00:00:00 2001 From: Igor Konopko Date: Tue, 11 Dec 2018 20:16:26 +0100 Subject: lightnvm: pblk: support packed metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk performs recovery of open lines by storing the LBA in the per LBA metadata field. Recovery therefore only works for drives that has this field. This patch adds support for packed metadata, which store l2p mapping for open lines in last sector of every write unit and enables drives without per IO metadata to recover open lines. After this patch, drives with OOB size <16B will use packed metadata and metadata size larger than16B will continue to use the device per IO metadata. Reviewed-by: Javier González Signed-off-by: Igor Konopko Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index e8055b796381..f9a3e47b6a93 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -399,6 +399,7 @@ static int pblk_core_init(struct pblk *pblk) pblk->nr_flush_rst = 0; pblk->min_write_pgs = geo->ws_opt; + pblk->min_write_pgs_data = pblk->min_write_pgs; max_write_ppas = pblk->min_write_pgs * geo->all_luns; pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA); pblk->max_write_pgs = min_t(int, pblk->max_write_pgs, @@ -406,9 +407,35 @@ static int pblk_core_init(struct pblk *pblk) pblk_set_sec_per_write(pblk, pblk->min_write_pgs); pblk->oob_meta_size = geo->sos; - if (pblk->oob_meta_size < sizeof(struct pblk_sec_meta)) { - pblk_err(pblk, "Unsupported metadata size\n"); - return -EINVAL; + if (!pblk_is_oob_meta_supported(pblk)) { + /* For drives which does not have OOB metadata feature + * in order to support recovery feature we need to use + * so called packed metadata. Packed metada will store + * the same information as OOB metadata (l2p table mapping, + * but in the form of the single page at the end of + * every write request. + */ + if (pblk->min_write_pgs + * sizeof(struct pblk_sec_meta) > PAGE_SIZE) { + /* We want to keep all the packed metadata on single + * page per write requests. So we need to ensure that + * it will fit. + * + * This is more like sanity check, since there is + * no device with such a big minimal write size + * (above 1 metabytes). + */ + pblk_err(pblk, "Not supported min write size\n"); + return -EINVAL; + } + /* For packed meta approach we do some simplification. + * On read path we always issue requests which size + * equal to max_write_pgs, with all pages filled with + * user payload except of last one page which will be + * filled with packed metadata. + */ + pblk->max_write_pgs = pblk->min_write_pgs; + pblk->min_write_pgs_data = pblk->min_write_pgs - 1; } pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t), @@ -641,7 +668,7 @@ static int pblk_set_provision(struct pblk *pblk, int nr_free_chks) struct pblk_line_meta *lm = &pblk->lm; struct nvm_geo *geo = &dev->geo; sector_t provisioned; - int sec_meta, blk_meta; + int sec_meta, blk_meta, clba; int minimum; if (geo->op == NVM_TARGET_DEFAULT_OP) @@ -682,7 +709,8 @@ static int pblk_set_provision(struct pblk *pblk, int nr_free_chks) sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); - pblk->capacity = (provisioned - blk_meta) * geo->clba; + clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data; + pblk->capacity = (provisioned - blk_meta) * clba; atomic_set(&pblk->rl.free_blocks, nr_free_chks); atomic_set(&pblk->rl.free_user_blocks, nr_free_chks); -- cgit v1.2.3