summaryrefslogtreecommitdiff
path: root/drivers/nvdimm/dimm_devs.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-11 12:19:31 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-11 12:19:31 -0700
commitd09ba13110e303d7baa29d170da94cd24f7662b2 (patch)
tree570b4d5e11889a6f951dee963c6549b2c8ca0293 /drivers/nvdimm/dimm_devs.c
parentf29135b54bcbfe1fea97d94e2ae860bade1d5a31 (diff)
parente476f94482fc20a23b7b33b3d8e50f1953f71828 (diff)
Merge tag 'libnvdimm-for-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams: "Aside from the recently added pmem sub-division support these have been in -next for several releases with no reported issues. The sub- division support was included in next-20161010 with no reported issues. It passes all unit tests including new tests for all the new functionality below. Summary: - PMEM sub-division support: Allow a single PMEM region to be divided into multiple namespaces. Originally, ~2 years ago, it was thought that partitions of a /dev/pmemX block device could handle sub-allocations of persistent memory for different use cases. With the decision to not support DAX mappings of raw block-devices, and the genesis of device-dax, the need for having multiple pmem-namespace per region has grown. - Device-DAX unified inode: In support of dynamic-resizing of a device-dax instance the kernel arranges for all mappings of a device-dax node to share the same inode. This allows unmap / truncate / invalidation events to affect all instances of the device similar to the behavior of mmap on block devices. - Hardware error scrubbing reworks: The original address-range-scrub and badblocks tracking solution allowed clearing entries at the individual namespace level, but it failed to clear the internal list of media errors maintained at the bus level. The result was that the next scrub or namespace disable/re-enable event would restore the cleared badblocks, but now that is fixed. The v4.8 kernel introduced an auto-scrub-on-machine-check behavior to repopulate the badblocks list. Now, in v4.9, the auto-scrub behavior can be disabled and simply arrange for the error reported in the machine-check to be added to the list. - DIMM health-event notification support: ACPI 6.1 defines a notification event code that can be send to ACPI NVDIMM devices. A poll(2) capable file descriptor for these events can be obtained from the nmemX/nfit/flags sysfs-attribute of a libnvdimm memory device. - Miscellaneous fixes: NVDIMM-N probe error, device-dax build error, and a change to dedup the flush hint list to not flush the memory controller more than necessary" * tag 'libnvdimm-for-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (39 commits) /dev/dax: fix Kconfig dependency build breakage dax: use correct dev_t value dax: convert devm_create_dax_dev to PTR_ERR libnvdimm, namespace: allow creation of multiple pmem-namespaces per region libnvdimm, namespace: lift single pmem limit in scan_labels() libnvdimm, namespace: filter out of range labels in scan_labels() libnvdimm, namespace: enable allocation of multiple pmem namespaces libnvdimm, namespace: update label implementation for multi-pmem libnvdimm, namespace: expand pmem device naming scheme for multi-pmem libnvdimm, region: update nd_region_available_dpa() for multi-pmem support libnvdimm, namespace: sort namespaces by dpa at init libnvdimm, namespace: allow multiple pmem-namespaces per region at scan time tools/testing/nvdimm: support for sub-dividing a pmem region libnvdimm, namespace: unify blk and pmem label scanning libnvdimm, namespace: refactor uuid_show() into a namespace_to_uuid() helper libnvdimm, label: convert label tracking to a linked list libnvdimm, region: move region-mapping input-paramters to nd_mapping_desc nvdimm: reduce duplicated wpq flushes libnvdimm: clear the internal poison_list when clearing badblocks pmem: reduce kmap_atomic sections to the memcpys only ...
Diffstat (limited to 'drivers/nvdimm/dimm_devs.c')
-rw-r--r--drivers/nvdimm/dimm_devs.c226
1 files changed, 175 insertions, 51 deletions
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index d9bba5edd8dc..d614493ad5ac 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -28,28 +28,30 @@ static DEFINE_IDA(dimm_ida);
* Retrieve bus and dimm handle and return if this bus supports
* get_config_data commands
*/
-static int __validate_dimm(struct nvdimm_drvdata *ndd)
+int nvdimm_check_config_data(struct device *dev)
{
- struct nvdimm *nvdimm;
-
- if (!ndd)
- return -EINVAL;
-
- nvdimm = to_nvdimm(ndd->dev);
+ struct nvdimm *nvdimm = to_nvdimm(dev);
- if (!nvdimm->cmd_mask)
- return -ENXIO;
- if (!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask))
- return -ENXIO;
+ if (!nvdimm->cmd_mask ||
+ !test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) {
+ if (nvdimm->flags & NDD_ALIASING)
+ return -ENXIO;
+ else
+ return -ENOTTY;
+ }
return 0;
}
static int validate_dimm(struct nvdimm_drvdata *ndd)
{
- int rc = __validate_dimm(ndd);
+ int rc;
- if (rc && ndd)
+ if (!ndd)
+ return -EINVAL;
+
+ rc = nvdimm_check_config_data(ndd->dev);
+ if (rc)
dev_dbg(ndd->dev, "%pf: %s error: %d\n",
__builtin_return_address(0), __func__, rc);
return rc;
@@ -263,6 +265,12 @@ const char *nvdimm_name(struct nvdimm *nvdimm)
}
EXPORT_SYMBOL_GPL(nvdimm_name);
+struct kobject *nvdimm_kobj(struct nvdimm *nvdimm)
+{
+ return &nvdimm->dev.kobj;
+}
+EXPORT_SYMBOL_GPL(nvdimm_kobj);
+
unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm)
{
return nvdimm->cmd_mask;
@@ -378,40 +386,166 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
}
EXPORT_SYMBOL_GPL(nvdimm_create);
+int alias_dpa_busy(struct device *dev, void *data)
+{
+ resource_size_t map_end, blk_start, new, busy;
+ struct blk_alloc_info *info = data;
+ struct nd_mapping *nd_mapping;
+ struct nd_region *nd_region;
+ struct nvdimm_drvdata *ndd;
+ struct resource *res;
+ int i;
+
+ if (!is_nd_pmem(dev))
+ return 0;
+
+ nd_region = to_nd_region(dev);
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ nd_mapping = &nd_region->mapping[i];
+ if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
+ break;
+ }
+
+ if (i >= nd_region->ndr_mappings)
+ return 0;
+
+ ndd = to_ndd(nd_mapping);
+ map_end = nd_mapping->start + nd_mapping->size - 1;
+ blk_start = nd_mapping->start;
+
+ /*
+ * In the allocation case ->res is set to free space that we are
+ * looking to validate against PMEM aliasing collision rules
+ * (i.e. BLK is allocated after all aliased PMEM).
+ */
+ if (info->res) {
+ if (info->res->start >= nd_mapping->start
+ && info->res->start < map_end)
+ /* pass */;
+ else
+ return 0;
+ }
+
+ retry:
+ /*
+ * Find the free dpa from the end of the last pmem allocation to
+ * the end of the interleave-set mapping that is not already
+ * covered by a blk allocation.
+ */
+ busy = 0;
+ for_each_dpa_resource(ndd, res) {
+ if ((res->start >= blk_start && res->start < map_end)
+ || (res->end >= blk_start
+ && res->end <= map_end)) {
+ if (strncmp(res->name, "pmem", 4) == 0) {
+ new = max(blk_start, min(map_end + 1,
+ res->end + 1));
+ if (new != blk_start) {
+ blk_start = new;
+ goto retry;
+ }
+ } else
+ busy += min(map_end, res->end)
+ - max(nd_mapping->start, res->start) + 1;
+ } else if (nd_mapping->start > res->start
+ && map_end < res->end) {
+ /* total eclipse of the PMEM region mapping */
+ busy += nd_mapping->size;
+ break;
+ }
+ }
+
+ /* update the free space range with the probed blk_start */
+ if (info->res && blk_start > info->res->start) {
+ info->res->start = max(info->res->start, blk_start);
+ if (info->res->start > info->res->end)
+ info->res->end = info->res->start - 1;
+ return 1;
+ }
+
+ info->available -= blk_start - nd_mapping->start + busy;
+
+ return 0;
+}
+
+static int blk_dpa_busy(struct device *dev, void *data)
+{
+ struct blk_alloc_info *info = data;
+ struct nd_mapping *nd_mapping;
+ struct nd_region *nd_region;
+ resource_size_t map_end;
+ int i;
+
+ if (!is_nd_pmem(dev))
+ return 0;
+
+ nd_region = to_nd_region(dev);
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ nd_mapping = &nd_region->mapping[i];
+ if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
+ break;
+ }
+
+ if (i >= nd_region->ndr_mappings)
+ return 0;
+
+ map_end = nd_mapping->start + nd_mapping->size - 1;
+ if (info->res->start >= nd_mapping->start
+ && info->res->start < map_end) {
+ if (info->res->end <= map_end) {
+ info->busy = 0;
+ return 1;
+ } else {
+ info->busy -= info->res->end - map_end;
+ return 0;
+ }
+ } else if (info->res->end >= nd_mapping->start
+ && info->res->end <= map_end) {
+ info->busy -= nd_mapping->start - info->res->start;
+ return 0;
+ } else {
+ info->busy -= nd_mapping->size;
+ return 0;
+ }
+}
+
/**
* nd_blk_available_dpa - account the unused dpa of BLK region
* @nd_mapping: container of dpa-resource-root + labels
*
- * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges.
+ * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges, but
+ * we arrange for them to never start at an lower dpa than the last
+ * PMEM allocation in an aliased region.
*/
-resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping)
+resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
{
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
+ struct nd_mapping *nd_mapping = &nd_region->mapping[0];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- resource_size_t map_end, busy = 0, available;
+ struct blk_alloc_info info = {
+ .nd_mapping = nd_mapping,
+ .available = nd_mapping->size,
+ .res = NULL,
+ };
struct resource *res;
if (!ndd)
return 0;
- map_end = nd_mapping->start + nd_mapping->size - 1;
- for_each_dpa_resource(ndd, res)
- if (res->start >= nd_mapping->start && res->start < map_end) {
- resource_size_t end = min(map_end, res->end);
+ device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
- busy += end - res->start + 1;
- } else if (res->end >= nd_mapping->start
- && res->end <= map_end) {
- busy += res->end - nd_mapping->start;
- } else if (nd_mapping->start > res->start
- && nd_mapping->start < res->end) {
- /* total eclipse of the BLK region mapping */
- busy += nd_mapping->size;
- }
+ /* now account for busy blk allocations in unaliased dpa */
+ for_each_dpa_resource(ndd, res) {
+ if (strncmp(res->name, "blk", 3) != 0)
+ continue;
- available = map_end - nd_mapping->start + 1;
- if (busy < available)
- return available - busy;
- return 0;
+ info.res = res;
+ info.busy = resource_size(res);
+ device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy);
+ info.available -= info.busy;
+ }
+
+ return info.available;
}
/**
@@ -443,21 +577,16 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
map_start = nd_mapping->start;
map_end = map_start + nd_mapping->size - 1;
blk_start = max(map_start, map_end + 1 - *overlap);
- for_each_dpa_resource(ndd, res)
+ for_each_dpa_resource(ndd, res) {
if (res->start >= map_start && res->start < map_end) {
if (strncmp(res->name, "blk", 3) == 0)
- blk_start = min(blk_start, res->start);
- else if (res->start != map_start) {
+ blk_start = min(blk_start,
+ max(map_start, res->start));
+ else if (res->end > map_end) {
reason = "misaligned to iset";
goto err;
- } else {
- if (busy) {
- reason = "duplicate overlapping PMEM reservations?";
- goto err;
- }
+ } else
busy += resource_size(res);
- continue;
- }
} else if (res->end >= map_start && res->end <= map_end) {
if (strncmp(res->name, "blk", 3) == 0) {
/*
@@ -466,15 +595,14 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
* be used for BLK.
*/
blk_start = map_start;
- } else {
- reason = "misaligned to iset";
- goto err;
- }
+ } else
+ busy += resource_size(res);
} else if (map_start > res->start && map_start < res->end) {
/* total eclipse of the mapping */
busy += nd_mapping->size;
blk_start = map_start;
}
+ }
*overlap = map_end + 1 - blk_start;
available = blk_start - map_start;
@@ -483,10 +611,6 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
return 0;
err:
- /*
- * Something is wrong, PMEM must align with the start of the
- * interleave set, and there can only be one allocation per set.
- */
nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
return 0;
}