diff options
| author | Dongsheng Yang <dongsheng.yang@linux.dev> | 2025-08-12 08:24:52 +0000 |
|---|---|---|
| committer | Mikulas Patocka <mpatocka@redhat.com> | 2025-08-25 15:25:29 +0200 |
| commit | 1d57628ff95b32d5cfa8d8f50e07690c161e9cf0 (patch) | |
| tree | 95776c8bc23942b2d82bee7ce7e1403fde28ecd2 /drivers/md/dm-pcache/dm_pcache.h | |
| parent | 499cbe0f2fb0641cf07a1a8ac9f7317674295fea (diff) | |
dm-pcache: add persistent cache target in device-mapper
This patch introduces dm-pcache, a new DM target that places a DAX-
capable persistent-memory device in front of any slower block device and
uses it as a high-throughput, low-latency cache.
Design highlights
-----------------
- DAX data path – data is copied directly between DRAM and the pmem
mapping, bypassing the block layer’s overhead.
- Segmented, crash-consistent layout
- all layout metadata are dual-replicated CRC-protected.
- atomic kset flushes; key replay on mount guarantees cache integrity
even after power loss.
- Striped multi-tree index
- Multi‑tree indexing for high parallelism.
- overlap-resolution logic ensures non-intersecting cached extents.
- Background services
- write-back worker flushes dirty keys in order, preserving backing-device
crash consistency. This is important for checkpoint in cloud storage.
- garbage collector reclaims clean segments when utilisation exceeds a
tunable threshold.
- Data integrity – optional CRC32 on cached payload; metadata always protected.
Comparison with existing block-level caches
---------------------------------------------------------------------------------------------------------------------------------
| Feature | pcache (this patch) | bcache | dm-writecache |
|----------------------------------|---------------------------------|------------------------------|---------------------------|
| pmem access method | DAX | bio (block I/O) | DAX |
| Write latency (4 K rand-write) | ~5 µs | ~20 µs | ~5 µs |
| Concurrency | multi subtree index | global index tree | single tree + wc_lock |
| IOPS (4K randwrite, 32 numjobs) | 2.1 M | 352 K | 283 K |
| Read-cache support | YES | YES | NO |
| Deployment | no re-format of backend | backend devices must be | no re-format of backend |
| | | reformatted | |
| Write-back ordering | log-structured; | no ordering guarantee | no ordering guarantee |
| | preserves app-IO-order | | |
| Data integrity checks | metadata + data CRC(optional) | metadata CRC only | none |
---------------------------------------------------------------------------------------------------------------------------------
Signed-off-by: Dongsheng Yang <dongsheng.yang@linux.dev>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Diffstat (limited to 'drivers/md/dm-pcache/dm_pcache.h')
| -rw-r--r-- | drivers/md/dm-pcache/dm_pcache.h | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/drivers/md/dm-pcache/dm_pcache.h b/drivers/md/dm-pcache/dm_pcache.h new file mode 100644 index 000000000000..b4e06be0c0b9 --- /dev/null +++ b/drivers/md/dm-pcache/dm_pcache.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _DM_PCACHE_H +#define _DM_PCACHE_H +#include <linux/device-mapper.h> + +#include "../dm-core.h" + +#define CACHE_DEV_TO_PCACHE(cache_dev) (container_of(cache_dev, struct dm_pcache, cache_dev)) +#define BACKING_DEV_TO_PCACHE(backing_dev) (container_of(backing_dev, struct dm_pcache, backing_dev)) +#define CACHE_TO_PCACHE(cache) (container_of(cache, struct dm_pcache, cache)) + +#define PCACHE_STATE_RUNNING 1 +#define PCACHE_STATE_STOPPING 2 + +struct pcache_cache_dev; +struct pcache_backing_dev; +struct pcache_cache; +struct pcache_cache_options; +struct dm_pcache { + struct dm_target *ti; + struct pcache_cache_dev cache_dev; + struct pcache_backing_dev backing_dev; + struct pcache_cache cache; + struct pcache_cache_options opts; + + spinlock_t defered_req_list_lock; + struct list_head defered_req_list; + struct workqueue_struct *task_wq; + + struct work_struct defered_req_work; + + atomic_t state; + atomic_t inflight_reqs; + wait_queue_head_t inflight_wq; +}; + +static inline bool pcache_is_stopping(struct dm_pcache *pcache) +{ + return (atomic_read(&pcache->state) == PCACHE_STATE_STOPPING); +} + +#define pcache_dev_err(pcache, fmt, ...) \ + pcache_err("%s " fmt, pcache->ti->table->md->name, ##__VA_ARGS__) +#define pcache_dev_info(pcache, fmt, ...) \ + pcache_info("%s " fmt, pcache->ti->table->md->name, ##__VA_ARGS__) +#define pcache_dev_debug(pcache, fmt, ...) \ + pcache_debug("%s " fmt, pcache->ti->table->md->name, ##__VA_ARGS__) + +struct pcache_request { + struct dm_pcache *pcache; + struct bio *bio; + + u64 off; + u32 data_len; + + struct kref ref; + int ret; + + struct list_head list_node; +}; + +void pcache_req_get(struct pcache_request *pcache_req); +void pcache_req_put(struct pcache_request *pcache_req, int ret); + +void pcache_defer_reqs_kick(struct dm_pcache *pcache); + +#endif /* _DM_PCACHE_H */ |
