diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-11 12:19:31 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-11 12:19:31 -0700 |
commit | d09ba13110e303d7baa29d170da94cd24f7662b2 (patch) | |
tree | 570b4d5e11889a6f951dee963c6549b2c8ca0293 /drivers/nvdimm/dimm_devs.c | |
parent | f29135b54bcbfe1fea97d94e2ae860bade1d5a31 (diff) | |
parent | e476f94482fc20a23b7b33b3d8e50f1953f71828 (diff) |
Merge tag 'libnvdimm-for-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
"Aside from the recently added pmem sub-division support these have
been in -next for several releases with no reported issues. The sub-
division support was included in next-20161010 with no reported
issues. It passes all unit tests including new tests for all the new
functionality below.
Summary:
- PMEM sub-division support: Allow a single PMEM region to be divided
into multiple namespaces. Originally, ~2 years ago, it was thought
that partitions of a /dev/pmemX block device could handle
sub-allocations of persistent memory for different use cases. With
the decision to not support DAX mappings of raw block-devices, and
the genesis of device-dax, the need for having multiple
pmem-namespace per region has grown.
- Device-DAX unified inode: In support of dynamic-resizing of a
device-dax instance the kernel arranges for all mappings of a
device-dax node to share the same inode. This allows unmap /
truncate / invalidation events to affect all instances of the
device similar to the behavior of mmap on block devices.
- Hardware error scrubbing reworks: The original address-range-scrub
and badblocks tracking solution allowed clearing entries at the
individual namespace level, but it failed to clear the internal
list of media errors maintained at the bus level. The result was
that the next scrub or namespace disable/re-enable event would
restore the cleared badblocks, but now that is fixed. The v4.8
kernel introduced an auto-scrub-on-machine-check behavior to
repopulate the badblocks list. Now, in v4.9, the auto-scrub
behavior can be disabled and simply arrange for the error reported
in the machine-check to be added to the list.
- DIMM health-event notification support: ACPI 6.1 defines a
notification event code that can be send to ACPI NVDIMM devices. A
poll(2) capable file descriptor for these events can be obtained
from the nmemX/nfit/flags sysfs-attribute of a libnvdimm memory
device.
- Miscellaneous fixes: NVDIMM-N probe error, device-dax build error,
and a change to dedup the flush hint list to not flush the memory
controller more than necessary"
* tag 'libnvdimm-for-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (39 commits)
/dev/dax: fix Kconfig dependency build breakage
dax: use correct dev_t value
dax: convert devm_create_dax_dev to PTR_ERR
libnvdimm, namespace: allow creation of multiple pmem-namespaces per region
libnvdimm, namespace: lift single pmem limit in scan_labels()
libnvdimm, namespace: filter out of range labels in scan_labels()
libnvdimm, namespace: enable allocation of multiple pmem namespaces
libnvdimm, namespace: update label implementation for multi-pmem
libnvdimm, namespace: expand pmem device naming scheme for multi-pmem
libnvdimm, region: update nd_region_available_dpa() for multi-pmem support
libnvdimm, namespace: sort namespaces by dpa at init
libnvdimm, namespace: allow multiple pmem-namespaces per region at scan time
tools/testing/nvdimm: support for sub-dividing a pmem region
libnvdimm, namespace: unify blk and pmem label scanning
libnvdimm, namespace: refactor uuid_show() into a namespace_to_uuid() helper
libnvdimm, label: convert label tracking to a linked list
libnvdimm, region: move region-mapping input-paramters to nd_mapping_desc
nvdimm: reduce duplicated wpq flushes
libnvdimm: clear the internal poison_list when clearing badblocks
pmem: reduce kmap_atomic sections to the memcpys only
...
Diffstat (limited to 'drivers/nvdimm/dimm_devs.c')
-rw-r--r-- | drivers/nvdimm/dimm_devs.c | 226 |
1 files changed, 175 insertions, 51 deletions
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index d9bba5edd8dc..d614493ad5ac 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -28,28 +28,30 @@ static DEFINE_IDA(dimm_ida); * Retrieve bus and dimm handle and return if this bus supports * get_config_data commands */ -static int __validate_dimm(struct nvdimm_drvdata *ndd) +int nvdimm_check_config_data(struct device *dev) { - struct nvdimm *nvdimm; - - if (!ndd) - return -EINVAL; - - nvdimm = to_nvdimm(ndd->dev); + struct nvdimm *nvdimm = to_nvdimm(dev); - if (!nvdimm->cmd_mask) - return -ENXIO; - if (!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) - return -ENXIO; + if (!nvdimm->cmd_mask || + !test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) { + if (nvdimm->flags & NDD_ALIASING) + return -ENXIO; + else + return -ENOTTY; + } return 0; } static int validate_dimm(struct nvdimm_drvdata *ndd) { - int rc = __validate_dimm(ndd); + int rc; - if (rc && ndd) + if (!ndd) + return -EINVAL; + + rc = nvdimm_check_config_data(ndd->dev); + if (rc) dev_dbg(ndd->dev, "%pf: %s error: %d\n", __builtin_return_address(0), __func__, rc); return rc; @@ -263,6 +265,12 @@ const char *nvdimm_name(struct nvdimm *nvdimm) } EXPORT_SYMBOL_GPL(nvdimm_name); +struct kobject *nvdimm_kobj(struct nvdimm *nvdimm) +{ + return &nvdimm->dev.kobj; +} +EXPORT_SYMBOL_GPL(nvdimm_kobj); + unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm) { return nvdimm->cmd_mask; @@ -378,40 +386,166 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, } EXPORT_SYMBOL_GPL(nvdimm_create); +int alias_dpa_busy(struct device *dev, void *data) +{ + resource_size_t map_end, blk_start, new, busy; + struct blk_alloc_info *info = data; + struct nd_mapping *nd_mapping; + struct nd_region *nd_region; + struct nvdimm_drvdata *ndd; + struct resource *res; + int i; + + if (!is_nd_pmem(dev)) + return 0; + + nd_region = to_nd_region(dev); + for (i = 0; i < nd_region->ndr_mappings; i++) { + nd_mapping = &nd_region->mapping[i]; + if (nd_mapping->nvdimm == info->nd_mapping->nvdimm) + break; + } + + if (i >= nd_region->ndr_mappings) + return 0; + + ndd = to_ndd(nd_mapping); + map_end = nd_mapping->start + nd_mapping->size - 1; + blk_start = nd_mapping->start; + + /* + * In the allocation case ->res is set to free space that we are + * looking to validate against PMEM aliasing collision rules + * (i.e. BLK is allocated after all aliased PMEM). + */ + if (info->res) { + if (info->res->start >= nd_mapping->start + && info->res->start < map_end) + /* pass */; + else + return 0; + } + + retry: + /* + * Find the free dpa from the end of the last pmem allocation to + * the end of the interleave-set mapping that is not already + * covered by a blk allocation. + */ + busy = 0; + for_each_dpa_resource(ndd, res) { + if ((res->start >= blk_start && res->start < map_end) + || (res->end >= blk_start + && res->end <= map_end)) { + if (strncmp(res->name, "pmem", 4) == 0) { + new = max(blk_start, min(map_end + 1, + res->end + 1)); + if (new != blk_start) { + blk_start = new; + goto retry; + } + } else + busy += min(map_end, res->end) + - max(nd_mapping->start, res->start) + 1; + } else if (nd_mapping->start > res->start + && map_end < res->end) { + /* total eclipse of the PMEM region mapping */ + busy += nd_mapping->size; + break; + } + } + + /* update the free space range with the probed blk_start */ + if (info->res && blk_start > info->res->start) { + info->res->start = max(info->res->start, blk_start); + if (info->res->start > info->res->end) + info->res->end = info->res->start - 1; + return 1; + } + + info->available -= blk_start - nd_mapping->start + busy; + + return 0; +} + +static int blk_dpa_busy(struct device *dev, void *data) +{ + struct blk_alloc_info *info = data; + struct nd_mapping *nd_mapping; + struct nd_region *nd_region; + resource_size_t map_end; + int i; + + if (!is_nd_pmem(dev)) + return 0; + + nd_region = to_nd_region(dev); + for (i = 0; i < nd_region->ndr_mappings; i++) { + nd_mapping = &nd_region->mapping[i]; + if (nd_mapping->nvdimm == info->nd_mapping->nvdimm) + break; + } + + if (i >= nd_region->ndr_mappings) + return 0; + + map_end = nd_mapping->start + nd_mapping->size - 1; + if (info->res->start >= nd_mapping->start + && info->res->start < map_end) { + if (info->res->end <= map_end) { + info->busy = 0; + return 1; + } else { + info->busy -= info->res->end - map_end; + return 0; + } + } else if (info->res->end >= nd_mapping->start + && info->res->end <= map_end) { + info->busy -= nd_mapping->start - info->res->start; + return 0; + } else { + info->busy -= nd_mapping->size; + return 0; + } +} + /** * nd_blk_available_dpa - account the unused dpa of BLK region * @nd_mapping: container of dpa-resource-root + labels * - * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges. + * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges, but + * we arrange for them to never start at an lower dpa than the last + * PMEM allocation in an aliased region. */ -resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping) +resource_size_t nd_blk_available_dpa(struct nd_region *nd_region) { + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); - resource_size_t map_end, busy = 0, available; + struct blk_alloc_info info = { + .nd_mapping = nd_mapping, + .available = nd_mapping->size, + .res = NULL, + }; struct resource *res; if (!ndd) return 0; - map_end = nd_mapping->start + nd_mapping->size - 1; - for_each_dpa_resource(ndd, res) - if (res->start >= nd_mapping->start && res->start < map_end) { - resource_size_t end = min(map_end, res->end); + device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy); - busy += end - res->start + 1; - } else if (res->end >= nd_mapping->start - && res->end <= map_end) { - busy += res->end - nd_mapping->start; - } else if (nd_mapping->start > res->start - && nd_mapping->start < res->end) { - /* total eclipse of the BLK region mapping */ - busy += nd_mapping->size; - } + /* now account for busy blk allocations in unaliased dpa */ + for_each_dpa_resource(ndd, res) { + if (strncmp(res->name, "blk", 3) != 0) + continue; - available = map_end - nd_mapping->start + 1; - if (busy < available) - return available - busy; - return 0; + info.res = res; + info.busy = resource_size(res); + device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy); + info.available -= info.busy; + } + + return info.available; } /** @@ -443,21 +577,16 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, map_start = nd_mapping->start; map_end = map_start + nd_mapping->size - 1; blk_start = max(map_start, map_end + 1 - *overlap); - for_each_dpa_resource(ndd, res) + for_each_dpa_resource(ndd, res) { if (res->start >= map_start && res->start < map_end) { if (strncmp(res->name, "blk", 3) == 0) - blk_start = min(blk_start, res->start); - else if (res->start != map_start) { + blk_start = min(blk_start, + max(map_start, res->start)); + else if (res->end > map_end) { reason = "misaligned to iset"; goto err; - } else { - if (busy) { - reason = "duplicate overlapping PMEM reservations?"; - goto err; - } + } else busy += resource_size(res); - continue; - } } else if (res->end >= map_start && res->end <= map_end) { if (strncmp(res->name, "blk", 3) == 0) { /* @@ -466,15 +595,14 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, * be used for BLK. */ blk_start = map_start; - } else { - reason = "misaligned to iset"; - goto err; - } + } else + busy += resource_size(res); } else if (map_start > res->start && map_start < res->end) { /* total eclipse of the mapping */ busy += nd_mapping->size; blk_start = map_start; } + } *overlap = map_end + 1 - blk_start; available = blk_start - map_start; @@ -483,10 +611,6 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, return 0; err: - /* - * Something is wrong, PMEM must align with the start of the - * interleave set, and there can only be one allocation per set. - */ nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason); return 0; } |