From 0d8627cc936de8ea04f3cc1e6921c63fb72cc199 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 22 Oct 2025 13:41:06 +0200 Subject: blktrace: add definitions for blk_user_trace_setup2 Add definitions for a version 2 of the blk_user_trace_setup ioctl. This new ioctl will enable a different struct layout of the binary data passed to user-space when using a new version of the blktrace utility requesting the new struct layout. Reviewed-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux/blktrace_api.h') diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 1bfb635e309b..a6958708d477 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -129,6 +129,7 @@ enum { }; #define BLKTRACE_BDEV_SIZE 32 +#define BLKTRACE_BDEV_SIZE2 64 /* * User setup structure passed with BLKTRACESETUP @@ -143,4 +144,19 @@ struct blk_user_trace_setup { __u32 pid; }; +/* + * User setup structure passed with BLKTRACESETUP2 + */ +struct blk_user_trace_setup2 { + char name[BLKTRACE_BDEV_SIZE2]; /* output */ + __u64 act_mask; /* input */ + __u32 buf_size; /* input */ + __u32 buf_nr; /* input */ + __u64 start_lba; + __u64 end_lba; + __u32 pid; + __u32 flags; /* currently unused */ + __u64 reserved[11]; +}; + #endif /* _UAPIBLKTRACE_H */ -- cgit v1.2.3 From c44347d606260f36a81f6d8415a5af33cb3015fa Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 22 Oct 2025 13:41:08 +0200 Subject: blktrace: add definitions for struct blk_io_trace2 Add definitions for the extended version of the blktrace protocol using a wider action type to be able to record new actions in the kernel. Reviewed-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Signed-off-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux/blktrace_api.h') diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index a6958708d477..9f9834d76e00 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -94,6 +94,7 @@ enum blktrace_notify { #define BLK_IO_TRACE_MAGIC 0x65617400 #define BLK_IO_TRACE_VERSION 0x07 +#define BLK_IO_TRACE2_VERSION 0x08 /* * The trace itself @@ -113,6 +114,21 @@ struct blk_io_trace { /* cgroup id will be stored here if exists */ }; +struct blk_io_trace2 { + __u32 magic; /* MAGIC << 8 | BLK_IO_TRACE2_VERSION */ + __u32 sequence; /* event number */ + __u64 time; /* in nanoseconds */ + __u64 sector; /* disk offset */ + __u32 bytes; /* transfer length */ + __u32 pid; /* who did it */ + __u64 action; /* what happened */ + __u32 device; /* device number */ + __u32 cpu; /* on what cpu did it happen */ + __u16 error; /* completion error */ + __u16 pdu_len; /* length of data after this trace */ + __u8 pad[12]; + /* cgroup id will be stored here if it exists */ +}; /* * The remap event */ -- cgit v1.2.3 From f9ee38bbf70fb20584625849a253c8652176fa66 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 22 Oct 2025 13:41:12 +0200 Subject: blktrace: add block trace commands for zone operations Add block trace commands for zone operations. These commands can only be handled with version 2 of the blktrace protocol. For version 1, warn if a command that does not fit into the 16 bits reserved for the command in this version is passed in. Reviewed-by: Martin K. Petersen Signed-off-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux/blktrace_api.h') diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 9f9834d76e00..190a3c5ab0a0 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -26,11 +26,20 @@ enum blktrace_cat { BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */ BLK_TC_FUA = 1 << 15, /* fua requests */ - BLK_TC_END = 1 << 15, /* we've run out of bits! */ + BLK_TC_END_V1 = 1 << 15, /* we've run out of bits! */ + + BLK_TC_ZONE_APPEND = 1ull << 16, /* zone append */ + BLK_TC_ZONE_RESET = 1ull << 17, /* zone reset */ + BLK_TC_ZONE_RESET_ALL = 1ull << 18, /* zone reset all */ + BLK_TC_ZONE_FINISH = 1ull << 19, /* zone finish */ + BLK_TC_ZONE_OPEN = 1ull << 20, /* zone open */ + BLK_TC_ZONE_CLOSE = 1ull << 21, /* zone close */ + + BLK_TC_END_V2 = 1ull << 21, }; #define BLK_TC_SHIFT (16) -#define BLK_TC_ACT(act) ((act) << BLK_TC_SHIFT) +#define BLK_TC_ACT(act) ((u64)(act) << BLK_TC_SHIFT) /* * Basic trace actions -- cgit v1.2.3 From 1c164fcc1b08e75f1cad1532718f09cddc0ddebe Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 22 Oct 2025 13:41:13 +0200 Subject: blktrace: expose ZONE APPEND completions to blktrace Expose ZONE APPEND completions as a block trace completion action to blktrace. As tracing of zoned block commands needs the upper 32bit of the widened 64bit action, only add traces to blktrace if user-space has requested version 2 of the blktrace protocol. Reviewed-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux/blktrace_api.h') diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 190a3c5ab0a0..289872e51fc5 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -97,6 +97,9 @@ enum blktrace_notify { #define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE)) #define BLK_TA_DRV_DATA (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA)) +#define BLK_TA_ZONE_APPEND (__BLK_TA_COMPLETE |\ + BLK_TC_ACT(BLK_TC_ZONE_APPEND)) + #define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_MESSAGE (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY)) -- cgit v1.2.3 From 3f6722816a73e2017599d965683dbe71833afd7a Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 22 Oct 2025 13:41:14 +0200 Subject: blktrace: trace zone write plugging operations Trace zone write plugging operations on block devices. As tracing of zoned block commands needs the upper 32bit of the widened 64bit action, only add traces to blktrace if user-space has requested version 2 of the blktrace protocol. Reviewed-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux/blktrace_api.h') diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 289872e51fc5..30f3d2589365 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -62,6 +62,8 @@ enum blktrace_act { __BLK_TA_REMAP, /* bio was remapped */ __BLK_TA_ABORT, /* request aborted */ __BLK_TA_DRV_DATA, /* driver-specific binary data */ + __BLK_TA_ZONE_PLUG, /* zone write plug was plugged */ + __BLK_TA_ZONE_UNPLUG, /* zone write plug was unplugged */ __BLK_TA_CGROUP = 1 << 8, /* from a cgroup*/ }; @@ -99,6 +101,9 @@ enum blktrace_notify { #define BLK_TA_ZONE_APPEND (__BLK_TA_COMPLETE |\ BLK_TC_ACT(BLK_TC_ZONE_APPEND)) +#define BLK_TA_ZONE_PLUG (__BLK_TA_ZONE_PLUG | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_ZONE_UNPLUG (__BLK_TA_ZONE_UNPLUG |\ + BLK_TC_ACT(BLK_TC_QUEUE)) #define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) -- cgit v1.2.3 From bc49af56eea866c34d21bf582f65b02fc8c06ec3 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 28 Oct 2025 20:34:23 -0700 Subject: blktrace: add support for REQ_OP_WRITE_ZEROES tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, REQ_OP_WRITE_ZEROES operations are not handled in the blktrace infrastructure, resulting in incorrect or missing operation labels in ftrace blktrace output. This manifests as write-zeroes operations appearing with incorrect labels like "N" instead of a proper "WZ" designation. This patch adds complete support for REQ_OP_WRITE_ZEROES across the blktrace infrastructure: Add BLK_TC_WRITE_ZEROES trace category in blktrace_api.h and update BLK_TC_END_V2 marker accordingly Map REQ_OP_WRITE_ZEROES to BLK_TC_WRITE_ZEROES in __blk_add_trace() to ensure proper trace event categorization Update fill_rwbs() to generate "WZ" label for write-zeroes operations in ftrace output, making them easily identifiable Add "write-zeroes" string mapping in act_to_str array for debugfs filter interface Update blk_fill_rwbs() to handle REQ_OP_WRITE_ZEROES for block layer event tracing With this fix, write-zeroes operations are now correctly traced and displayed. =========================================================== BEFORE THIS PATCH =========================================================== blkdiscard -z -o 0 -l 40960 /dev/nvme0n1 blkdiscard-3809 [030] ..... 1212.253701: block_bio_queue: 259,0 NS 0 + 80 [blkdiscard] blkdiscard-3809 [030] ..... 1212.253703: block_getrq: 259,0 NS 0 + 80 [blkdiscard] blkdiscard-3809 [030] ..... 1212.253704: block_io_start: 259,0 NS 40960 () 0 + 80 be,0,4 [blkdiscard] blkdiscard-3809 [030] ..... 1212.253704: block_plug: [blkdiscard] blkdiscard-3809 [030] ..... 1212.253706: block_unplug: [blkdiscard] 1 blkdiscard-3809 [030] ..... 1212.253706: block_rq_insert: 259,0 NS 40960 () 0 + 80 be,0,4 [blkdiscard] kworker/30:1H-566 [030] ..... 1212.253726: block_rq_issue: 259,0 NS 40960 () 0 + 80 be,0,4 [kworker/30:1H] -0 [030] d.h1. 1212.253957: block_rq_complete: 259,0 NS () 0 + 80 be,0,4 [0] -0 [030] dNh1. 1212.253960: block_io_done: 259,0 NS 0 () 0 + 0 none,0,0 [swapper/30] Trace Event Breakdown: Event | Device | Op | Sector | Sectors | Byte Size | Calculation block_bio_queue | 259,0 | NS | 0 | 80 | - | 80 × 512 = 40,960 block_getrq | 259,0 | NS | 0 | 80 | - | 80 × 512 = 40,960 block_io_start | 259,0 | NS | 0 | 80 | 40960 | Direct from trace block_rq_insert | 259,0 | NS | 0 | 80 | 40960 | Direct from trace block_rq_issue | 259,0 | NS | 0 | 80 | 40960 | Direct from trace block_rq_complete | 259,0 | NS | 0 | 80 | - | 80 × 512 = 40,960 block_io_done | 259,0 | NS | 0 | 0 | 0 | Completion (no data) Total Bytes Transferred: Sectors: 80 Bytes: 80 × 512 = 40,960 bytes =========================================================== AFTER THIS PATCH =========================================================== blkdiscard -z -o 0 -l 40960 /dev/nvme0n1 blkdiscard-2477 [020] ..... 960.989131: block_bio_queue: 259,0 WZS 0 + 80 [blkdiscard] blkdiscard-2477 [020] ..... 960.989134: block_getrq: 259,0 WZS 0 + 80 [blkdiscard] blkdiscard-2477 [020] ..... 960.989135: block_io_start: 259,0 WZS 40960 () 0 + 80 be,0,4 [blkdiscard] blkdiscard-2477 [020] ..... 960.989138: block_plug: [blkdiscard] blkdiscard-2477 [020] ..... 960.989140: block_unplug: [blkdiscard] 1 blkdiscard-2477 [020] ..... 960.989141: block_rq_insert: 259,0 WZS 40960 () 0 + 80 be,0,4 [blkdiscard] kworker/20:1H-736 [020] ..... 960.989166: block_rq_issue: 259,0 WZS 40960 () 0 + 80 be,0,4 [kworker/20:1H] -0 [020] d.h1. 960.989476: block_rq_complete: 259,0 WZS () 0 + 80 be,0,4 [0] -0 [020] dNh1. 960.989482: block_io_done: 259,0 WZS 0 () 0 + 0 none,0,0 [swapper/20] Trace Event Breakdown: Event | Device | Op | Sector | Sectors | Byte Size | Calculation block_bio_queue | 259,0 | WZS | 0 | 80 | - | 80 × 512 = 40,960 block_getrq | 259,0 | WZS | 0 | 80 | - | 80 × 512 = 40,960 block_io_start | 259,0 | WZS | 0 | 80 | 40960 | Direct from trace block_rq_insert | 259,0 | WZS | 0 | 80 | 40960 | Direct from trace block_rq_issue | 259,0 | WZS | 0 | 80 | 40960 | Direct from trace block_rq_complete | 259,0 | WZS | 0 | 80 | - | 80 × 512 = 40,960 block_io_done | 259,0 | WZS | 0 | 0 | 0 | Completion (no data) Total Bytes Transferred: Sectors: 80 Bytes: 80 × 512 = 40,960 bytes Tested with ftrace blktrace on NVMe devices using blkdiscard with the -z (write-zeroes) flag. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux/blktrace_api.h') diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 30f3d2589365..7c092d9f3aa4 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -35,7 +35,9 @@ enum blktrace_cat { BLK_TC_ZONE_OPEN = 1ull << 20, /* zone open */ BLK_TC_ZONE_CLOSE = 1ull << 21, /* zone close */ - BLK_TC_END_V2 = 1ull << 21, + BLK_TC_WRITE_ZEROES = 1ull << 22, /* write-zeroes */ + + BLK_TC_END_V2 = 1ull << 22, }; #define BLK_TC_SHIFT (16) -- cgit v1.2.3