summaryrefslogtreecommitdiff
path: root/drivers/md/dm-pcache/dm_pcache.h
diff options
context:
space:
mode:
authorDongsheng Yang <dongsheng.yang@linux.dev>2025-08-12 08:24:52 +0000
committerMikulas Patocka <mpatocka@redhat.com>2025-08-25 15:25:29 +0200
commit1d57628ff95b32d5cfa8d8f50e07690c161e9cf0 (patch)
tree95776c8bc23942b2d82bee7ce7e1403fde28ecd2 /drivers/md/dm-pcache/dm_pcache.h
parent499cbe0f2fb0641cf07a1a8ac9f7317674295fea (diff)
dm-pcache: add persistent cache target in device-mapper
This patch introduces dm-pcache, a new DM target that places a DAX- capable persistent-memory device in front of any slower block device and uses it as a high-throughput, low-latency cache. Design highlights ----------------- - DAX data path – data is copied directly between DRAM and the pmem mapping, bypassing the block layer’s overhead. - Segmented, crash-consistent layout - all layout metadata are dual-replicated CRC-protected. - atomic kset flushes; key replay on mount guarantees cache integrity even after power loss. - Striped multi-tree index - Multi‑tree indexing for high parallelism. - overlap-resolution logic ensures non-intersecting cached extents. - Background services - write-back worker flushes dirty keys in order, preserving backing-device crash consistency. This is important for checkpoint in cloud storage. - garbage collector reclaims clean segments when utilisation exceeds a tunable threshold. - Data integrity – optional CRC32 on cached payload; metadata always protected. Comparison with existing block-level caches --------------------------------------------------------------------------------------------------------------------------------- | Feature | pcache (this patch) | bcache | dm-writecache | |----------------------------------|---------------------------------|------------------------------|---------------------------| | pmem access method | DAX | bio (block I/O) | DAX | | Write latency (4 K rand-write) | ~5 µs | ~20 µs | ~5 µs | | Concurrency | multi subtree index | global index tree | single tree + wc_lock | | IOPS (4K randwrite, 32 numjobs) | 2.1 M | 352 K | 283 K | | Read-cache support | YES | YES | NO | | Deployment | no re-format of backend | backend devices must be | no re-format of backend | | | | reformatted | | | Write-back ordering | log-structured; | no ordering guarantee | no ordering guarantee | | | preserves app-IO-order | | | | Data integrity checks | metadata + data CRC(optional) | metadata CRC only | none | --------------------------------------------------------------------------------------------------------------------------------- Signed-off-by: Dongsheng Yang <dongsheng.yang@linux.dev> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Diffstat (limited to 'drivers/md/dm-pcache/dm_pcache.h')
-rw-r--r--drivers/md/dm-pcache/dm_pcache.h67
1 files changed, 67 insertions, 0 deletions
diff --git a/drivers/md/dm-pcache/dm_pcache.h b/drivers/md/dm-pcache/dm_pcache.h
new file mode 100644
index 000000000000..b4e06be0c0b9
--- /dev/null
+++ b/drivers/md/dm-pcache/dm_pcache.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _DM_PCACHE_H
+#define _DM_PCACHE_H
+#include <linux/device-mapper.h>
+
+#include "../dm-core.h"
+
+#define CACHE_DEV_TO_PCACHE(cache_dev) (container_of(cache_dev, struct dm_pcache, cache_dev))
+#define BACKING_DEV_TO_PCACHE(backing_dev) (container_of(backing_dev, struct dm_pcache, backing_dev))
+#define CACHE_TO_PCACHE(cache) (container_of(cache, struct dm_pcache, cache))
+
+#define PCACHE_STATE_RUNNING 1
+#define PCACHE_STATE_STOPPING 2
+
+struct pcache_cache_dev;
+struct pcache_backing_dev;
+struct pcache_cache;
+struct pcache_cache_options;
+struct dm_pcache {
+ struct dm_target *ti;
+ struct pcache_cache_dev cache_dev;
+ struct pcache_backing_dev backing_dev;
+ struct pcache_cache cache;
+ struct pcache_cache_options opts;
+
+ spinlock_t defered_req_list_lock;
+ struct list_head defered_req_list;
+ struct workqueue_struct *task_wq;
+
+ struct work_struct defered_req_work;
+
+ atomic_t state;
+ atomic_t inflight_reqs;
+ wait_queue_head_t inflight_wq;
+};
+
+static inline bool pcache_is_stopping(struct dm_pcache *pcache)
+{
+ return (atomic_read(&pcache->state) == PCACHE_STATE_STOPPING);
+}
+
+#define pcache_dev_err(pcache, fmt, ...) \
+ pcache_err("%s " fmt, pcache->ti->table->md->name, ##__VA_ARGS__)
+#define pcache_dev_info(pcache, fmt, ...) \
+ pcache_info("%s " fmt, pcache->ti->table->md->name, ##__VA_ARGS__)
+#define pcache_dev_debug(pcache, fmt, ...) \
+ pcache_debug("%s " fmt, pcache->ti->table->md->name, ##__VA_ARGS__)
+
+struct pcache_request {
+ struct dm_pcache *pcache;
+ struct bio *bio;
+
+ u64 off;
+ u32 data_len;
+
+ struct kref ref;
+ int ret;
+
+ struct list_head list_node;
+};
+
+void pcache_req_get(struct pcache_request *pcache_req);
+void pcache_req_put(struct pcache_request *pcache_req, int ret);
+
+void pcache_defer_reqs_kick(struct dm_pcache *pcache);
+
+#endif /* _DM_PCACHE_H */